@siteed/expo-audio-stream 1.17.0 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. package/CHANGELOG.md +26 -1
  2. package/README.md +1 -1
  3. package/android/src/main/java/net/siteed/audiostream/AudioAnalysisData.kt +68 -22
  4. package/android/src/main/java/net/siteed/audiostream/AudioFormatUtils.kt +24 -0
  5. package/android/src/main/java/net/siteed/audiostream/AudioProcessor.kt +836 -386
  6. package/android/src/main/java/net/siteed/audiostream/AudioRecorderManager.kt +0 -2
  7. package/android/src/main/java/net/siteed/audiostream/AudioRecordingService.kt +35 -29
  8. package/android/src/main/java/net/siteed/audiostream/ExpoAudioStreamModule.kt +236 -96
  9. package/android/src/main/java/net/siteed/audiostream/FFT.kt +55 -0
  10. package/android/src/main/java/net/siteed/audiostream/Features.kt +49 -7
  11. package/android/src/main/java/net/siteed/audiostream/RecordingConfig.kt +2 -4
  12. package/build/AudioAnalysis/AudioAnalysis.types.d.ts +55 -47
  13. package/build/AudioAnalysis/AudioAnalysis.types.d.ts.map +1 -1
  14. package/build/AudioAnalysis/AudioAnalysis.types.js.map +1 -1
  15. package/build/AudioAnalysis/extractAudioAnalysis.d.ts +60 -13
  16. package/build/AudioAnalysis/extractAudioAnalysis.d.ts.map +1 -1
  17. package/build/AudioAnalysis/extractAudioAnalysis.js +147 -162
  18. package/build/AudioAnalysis/extractAudioAnalysis.js.map +1 -1
  19. package/build/ExpoAudioStream.types.d.ts +47 -3
  20. package/build/ExpoAudioStream.types.d.ts.map +1 -1
  21. package/build/ExpoAudioStream.types.js.map +1 -1
  22. package/build/ExpoAudioStream.web.d.ts.map +1 -1
  23. package/build/ExpoAudioStream.web.js +0 -1
  24. package/build/ExpoAudioStream.web.js.map +1 -1
  25. package/build/ExpoAudioStreamModule.d.ts.map +1 -1
  26. package/build/ExpoAudioStreamModule.js +216 -12
  27. package/build/ExpoAudioStreamModule.js.map +1 -1
  28. package/build/WebRecorder.web.d.ts +67 -13
  29. package/build/WebRecorder.web.d.ts.map +1 -1
  30. package/build/WebRecorder.web.js +177 -173
  31. package/build/WebRecorder.web.js.map +1 -1
  32. package/build/index.d.ts +3 -3
  33. package/build/index.d.ts.map +1 -1
  34. package/build/index.js +2 -2
  35. package/build/index.js.map +1 -1
  36. package/build/useAudioRecorder.d.ts.map +1 -1
  37. package/build/useAudioRecorder.js +12 -8
  38. package/build/useAudioRecorder.js.map +1 -1
  39. package/build/utils/audioProcessing.d.ts +24 -0
  40. package/build/utils/audioProcessing.d.ts.map +1 -0
  41. package/build/utils/audioProcessing.js +133 -0
  42. package/build/utils/audioProcessing.js.map +1 -0
  43. package/build/workers/InlineFeaturesExtractor.web.d.ts +1 -1
  44. package/build/workers/InlineFeaturesExtractor.web.d.ts.map +1 -1
  45. package/build/workers/InlineFeaturesExtractor.web.js +694 -194
  46. package/build/workers/InlineFeaturesExtractor.web.js.map +1 -1
  47. package/build/workers/inlineAudioWebWorker.web.d.ts +1 -1
  48. package/build/workers/inlineAudioWebWorker.web.d.ts.map +1 -1
  49. package/build/workers/inlineAudioWebWorker.web.js +3 -2
  50. package/build/workers/inlineAudioWebWorker.web.js.map +1 -1
  51. package/ios/AudioAnalysisData.swift +51 -16
  52. package/ios/AudioProcessingHelpers.swift +710 -26
  53. package/ios/AudioProcessor.swift +334 -185
  54. package/ios/AudioStreamManager.swift +2 -3
  55. package/ios/DataPoint.swift +25 -12
  56. package/ios/DecodingConfig.swift +47 -0
  57. package/ios/ExpoAudioStreamModule.swift +187 -103
  58. package/ios/FFT.swift +62 -0
  59. package/ios/Features.swift +24 -3
  60. package/ios/RecordingSettings.swift +7 -7
  61. package/package.json +2 -1
  62. package/plugin/build/index.js +6 -1
  63. package/plugin/src/index.ts +9 -1
  64. package/src/AudioAnalysis/AudioAnalysis.types.ts +68 -52
  65. package/src/AudioAnalysis/extractAudioAnalysis.ts +223 -219
  66. package/src/ExpoAudioStream.types.ts +53 -7
  67. package/src/ExpoAudioStream.web.ts +0 -1
  68. package/src/ExpoAudioStreamModule.ts +255 -10
  69. package/src/WebRecorder.web.ts +231 -244
  70. package/src/index.ts +5 -3
  71. package/src/useAudioRecorder.tsx +14 -10
  72. package/src/utils/audioProcessing.ts +205 -0
  73. package/src/workers/InlineFeaturesExtractor.web.tsx +694 -194
  74. package/src/workers/inlineAudioWebWorker.web.tsx +3 -2
@@ -1,21 +1,494 @@
1
+ // packages/expo-audio-stream/src/workers/InlineFeaturesExtractor.web.tsx
1
2
  export const InlineFeaturesExtractor = `
3
+ // Constants
4
+ const N_FFT = 1024; // Default FFT size
5
+ const MAX_FFT_SIZE = 8192; // Maximum FFT size to prevent memory issues
6
+ const N_CHROMA = 12;
7
+
8
+ // FFT Implementation with normalized Hann window
9
+ function FFT(n) {
10
+ this.n = n;
11
+ this.cosTable = new Float32Array(n / 2);
12
+ this.sinTable = new Float32Array(n / 2);
13
+ this.hannWindow = new Float32Array(n);
14
+
15
+ // Match Android implementation with precomputed tables
16
+ const normalizationFactor = Math.sqrt(2.0 / n);
17
+ for (var i = 0; i < n / 2; i++) {
18
+ this.cosTable[i] = Math.cos(2.0 * Math.PI * i / n);
19
+ this.sinTable[i] = Math.sin(2.0 * Math.PI * i / n);
20
+ }
21
+
22
+ // Precompute normalized Hann window to match Android
23
+ for (var i = 0; i < n; i++) {
24
+ this.hannWindow[i] = normalizationFactor * 0.5 * (1 - Math.cos(2.0 * Math.PI * i / (n - 1)));
25
+ }
26
+ }
27
+
28
+ FFT.prototype.transform = function(data) {
29
+ const n = data.length;
30
+
31
+ // Validate input length is power of 2
32
+ if ((n & (n - 1)) !== 0) {
33
+ throw new Error('FFT length must be power of 2');
34
+ }
35
+
36
+ // Use iterative bit reversal instead of recursive
37
+ const bitReversedIndices = new Uint32Array(n);
38
+ for (let i = 0; i < n; i++) {
39
+ let reversed = 0;
40
+ let j = i;
41
+ let bits = Math.log2(n);
42
+ while (bits--) {
43
+ reversed = (reversed << 1) | (j & 1);
44
+ j >>= 1;
45
+ }
46
+ bitReversedIndices[i] = reversed;
47
+ }
48
+
49
+ // Apply bit reversal
50
+ for (let i = 0; i < n; i++) {
51
+ const j = bitReversedIndices[i];
52
+ if (i < j) {
53
+ const temp = data[i];
54
+ data[i] = data[j];
55
+ data[j] = temp;
56
+ }
57
+ }
58
+
59
+ // Iterative FFT computation with optimized memory usage
60
+ for (let step = 1; step < n; step <<= 1) {
61
+ const jump = step << 1;
62
+ const angleStep = Math.PI / step;
63
+
64
+ for (let group = 0; group < n; group += jump) {
65
+ for (let pair = group; pair < group + step; pair++) {
66
+ const match = pair + step;
67
+ const angle = angleStep * (pair - group);
68
+
69
+ const currentCos = Math.cos(angle);
70
+ const currentSin = Math.sin(angle);
71
+
72
+ const real = currentCos * data[match] - currentSin * data[match + 1];
73
+ const imag = currentCos * data[match + 1] + currentSin * data[match];
74
+
75
+ data[match] = data[pair] - real;
76
+ data[match + 1] = data[pair + 1] - imag;
77
+ data[pair] += real;
78
+ data[pair + 1] += imag;
79
+ }
80
+ }
81
+ }
82
+ };
83
+
84
+ // Add realInverse method
85
+ FFT.prototype.realInverse = function(powerSpectrum, output) {
86
+ const n = powerSpectrum.length;
87
+ const complexData = new Float32Array(n * 2);
88
+
89
+ // Copy power spectrum to complex format
90
+ for (let i = 0; i < n/2 + 1; i++) {
91
+ complexData[2 * i] = powerSpectrum[i];
92
+ if (2 * i + 1 < complexData.length) {
93
+ complexData[2 * i + 1] = 0;
94
+ }
95
+ }
96
+
97
+ // Conjugate for inverse FFT
98
+ for (let i = 0; i < n; i++) {
99
+ if (2 * i + 1 < complexData.length) {
100
+ complexData[2 * i + 1] = -complexData[2 * i + 1];
101
+ }
102
+ }
103
+
104
+ this.transform(complexData);
105
+
106
+ // Copy real part to output and scale
107
+ for (let i = 0; i < n; i++) {
108
+ output[i] = complexData[2 * i] / n;
109
+ }
110
+ };
111
+
112
+ // Add helper functions to match Android
113
+ function nextPowerOfTwo(n) {
114
+ let value = 1;
115
+ while (value < n) {
116
+ value *= 2;
117
+ }
118
+ return value;
119
+ }
120
+
121
+ function applyHannWindow(samples) {
122
+ const output = new Float32Array(samples.length);
123
+ for (let i = 0; i < samples.length; i++) {
124
+ const multiplier = 0.5 * (1 - Math.cos(2 * Math.PI * i / (samples.length - 1)));
125
+ output[i] = samples[i] * multiplier;
126
+ }
127
+ return output;
128
+ }
129
+
130
+ // Update spectral feature computation to match Android
131
+ function computeSpectralFeatures(segment, sampleRate, featureOptions = {}) {
132
+ try {
133
+ // Early return if no spectral features are requested
134
+ if (!featureOptions.spectralCentroid &&
135
+ !featureOptions.spectralFlatness &&
136
+ !featureOptions.spectralRollOff &&
137
+ !featureOptions.spectralBandwidth &&
138
+ !featureOptions.magnitudeSpectrum) {
139
+ return {
140
+ centroid: 0,
141
+ flatness: 0,
142
+ rollOff: 0,
143
+ bandwidth: 0,
144
+ magnitudeSpectrum: []
145
+ };
146
+ }
147
+
148
+ // Ensure we have valid data
149
+ if (!segment || segment.length === 0) {
150
+ throw new Error('Invalid segment data');
151
+ }
152
+
153
+ // Process in fixed-size chunks
154
+ const chunkSize = N_FFT;
155
+ const numChunks = Math.ceil(segment.length / chunkSize);
156
+
157
+ let results = {
158
+ centroid: 0,
159
+ flatness: 0,
160
+ rollOff: 0,
161
+ bandwidth: 0,
162
+ magnitudeSpectrum: new Float32Array(N_FFT / 2 + 1).fill(0)
163
+ };
164
+
165
+ let validChunks = 0;
166
+
167
+ // Iterate through chunks
168
+ for (let i = 0; i < numChunks; i++) {
169
+ const start = i * chunkSize;
170
+ const end = Math.min(start + chunkSize, segment.length);
171
+ const chunk = segment.slice(start, end);
172
+
173
+ if (chunk.length < N_FFT / 4) continue; // Skip very small chunks
174
+
175
+ // Process the chunk
176
+ const paddedChunk = new Float32Array(N_FFT);
177
+ paddedChunk.set(applyHannWindow(chunk));
178
+
179
+ const fft = new FFT(N_FFT);
180
+ fft.transform(paddedChunk);
181
+
182
+ // Calculate magnitude spectrum
183
+ const chunkMagnitudeSpectrum = new Float32Array(N_FFT / 2 + 1);
184
+ let hasSignal = false;
185
+
186
+ for (let j = 0; j < N_FFT / 2; j++) {
187
+ const re = paddedChunk[2 * j];
188
+ const im = paddedChunk[2 * j + 1];
189
+ const magnitude = Math.sqrt(re * re + im * im);
190
+ chunkMagnitudeSpectrum[j] = magnitude;
191
+ if (magnitude > Number.EPSILON) hasSignal = true;
192
+ }
193
+
194
+ if (!hasSignal) continue;
195
+ validChunks++;
196
+
197
+ // Accumulate results
198
+ if (featureOptions.spectralCentroid) {
199
+ const centroid = computeSpectralCentroid(chunkMagnitudeSpectrum, sampleRate);
200
+ if (!isNaN(centroid)) results.centroid += centroid;
201
+ }
202
+
203
+ if (featureOptions.spectralFlatness) {
204
+ const flatness = computeSpectralFlatness(chunkMagnitudeSpectrum);
205
+ if (!isNaN(flatness)) results.flatness += flatness;
206
+ }
207
+
208
+ if (featureOptions.spectralRollOff) {
209
+ const rolloff = computeSpectralRollOff(chunkMagnitudeSpectrum, sampleRate);
210
+ if (!isNaN(rolloff)) results.rollOff += rolloff;
211
+ }
212
+
213
+ if (featureOptions.spectralBandwidth && !isNaN(results.centroid)) {
214
+ const bandwidth = computeSpectralBandwidth(chunkMagnitudeSpectrum, sampleRate, results.centroid);
215
+ if (!isNaN(bandwidth)) results.bandwidth += bandwidth;
216
+ }
217
+
218
+ if (featureOptions.magnitudeSpectrum) {
219
+ for (let j = 0; j < results.magnitudeSpectrum.length; j++) {
220
+ results.magnitudeSpectrum[j] += chunkMagnitudeSpectrum[j];
221
+ }
222
+ }
223
+ }
224
+
225
+ // Average the accumulated results
226
+ if (validChunks > 0) {
227
+ results.centroid /= validChunks;
228
+ results.flatness /= validChunks;
229
+ results.rollOff /= validChunks;
230
+ results.bandwidth /= validChunks;
231
+
232
+ if (featureOptions.magnitudeSpectrum) {
233
+ for (let i = 0; i < results.magnitudeSpectrum.length; i++) {
234
+ results.magnitudeSpectrum[i] /= validChunks;
235
+ }
236
+ }
237
+ }
238
+
239
+ return results;
240
+ } catch (error) {
241
+ console.error('[Worker] Spectral feature computation error:', error);
242
+ return {
243
+ centroid: 0,
244
+ flatness: 0,
245
+ rollOff: 0,
246
+ bandwidth: 0,
247
+ magnitudeSpectrum: []
248
+ };
249
+ }
250
+ }
251
+
252
+ function computeSpectralCentroid(magnitudeSpectrum, sampleRate) {
253
+ const sum = magnitudeSpectrum.reduce((a, b) => a + (b || 0), 0);
254
+ if (sum <= Number.EPSILON) return 0;
255
+
256
+ const weightedSum = magnitudeSpectrum.reduce((acc, value, index) =>
257
+ acc + (index * (sampleRate / N_FFT) * (value || 0)), 0);
258
+
259
+ return weightedSum / sum;
260
+ }
261
+
262
+ function computeSpectralFlatness(powerSpectrum) {
263
+ // Add small epsilon to avoid log(0)
264
+ const epsilon = Number.EPSILON;
265
+ const validSpectrum = powerSpectrum.map(v => Math.max(v, epsilon));
266
+
267
+ const geometricMean = Math.exp(
268
+ validSpectrum
269
+ .map(v => Math.log(v))
270
+ .reduce((a, b) => a + b) / validSpectrum.length
271
+ );
272
+
273
+ const arithmeticMean =
274
+ validSpectrum.reduce((a, b) => a + b) / validSpectrum.length;
275
+
276
+ return geometricMean / arithmeticMean;
277
+ }
278
+
279
+ function computeSpectralRollOff(magnitudeSpectrum, sampleRate) {
280
+ const totalEnergy = magnitudeSpectrum.reduce((a, b) => a + b, 0);
281
+ const rollOffThreshold = totalEnergy * 0.85;
282
+ let cumulativeEnergy = 0;
283
+
284
+ for (let i = 0; i < magnitudeSpectrum.length; i++) {
285
+ cumulativeEnergy += magnitudeSpectrum[i];
286
+ if (cumulativeEnergy >= rollOffThreshold) {
287
+ return (i / magnitudeSpectrum.length) * (sampleRate / 2);
288
+ }
289
+ }
290
+
291
+ return 0;
292
+ }
293
+
294
+ function computeSpectralBandwidth(magnitudeSpectrum, sampleRate, centroid) {
295
+ const sum = magnitudeSpectrum.reduce((a, b) => a + (b || 0), 0);
296
+ if (sum <= Number.EPSILON) return 0;
297
+
298
+ const weightedSum = magnitudeSpectrum.reduce(
299
+ (acc, value, index) => {
300
+ const freq = index * sampleRate / (2 * magnitudeSpectrum.length);
301
+ return acc + (value || 0) * Math.pow(freq - centroid, 2);
302
+ }, 0
303
+ );
304
+
305
+ return Math.sqrt(weightedSum / sum);
306
+ }
307
+
308
+ function computeChroma(segmentData, sampleRate) {
309
+ // Ensure we have valid input data
310
+ if (!segmentData || segmentData.length === 0) {
311
+ return new Array(N_CHROMA).fill(0);
312
+ }
313
+
314
+ const fftLength = nextPowerOfTwo(Math.max(segmentData.length, N_FFT));
315
+ const windowed = applyHannWindow(segmentData);
316
+ const padded = new Float32Array(fftLength);
317
+ padded.set(windowed.slice(0, Math.min(windowed.length, fftLength)));
318
+
319
+ const fft = new FFT(fftLength);
320
+ try {
321
+ fft.transform(padded);
322
+ } catch (e) {
323
+ console.error('[Worker] FFT transform failed in chromagram:', e);
324
+ return new Array(N_CHROMA).fill(0);
325
+ }
326
+
327
+ const chroma = new Float32Array(N_CHROMA).fill(0);
328
+ const freqsPerBin = sampleRate / fftLength;
329
+ let totalEnergy = 0;
330
+
331
+ // First pass: compute magnitudes and total energy
332
+ for (let i = 0; i < fftLength / 2; i++) {
333
+ const freq = i * freqsPerBin;
334
+ if (freq > 20) { // Only consider frequencies above 20 Hz
335
+ const re = padded[2 * i];
336
+ const im = padded[2 * i + 1] || 0;
337
+ const magnitude = Math.sqrt(re * re + im * im);
338
+
339
+ if (magnitude > Number.EPSILON) {
340
+ // Use a more stable pitch class calculation
341
+ const midiNote = 69 + 12 * Math.log2(freq / 440.0);
342
+ const pitchClass = Math.round(midiNote) % 12;
343
+
344
+ if (pitchClass >= 0 && pitchClass < 12) {
345
+ chroma[pitchClass] += magnitude;
346
+ totalEnergy += magnitude;
347
+ }
348
+ }
349
+ }
350
+ }
351
+
352
+ // Normalize chroma values only if we have energy
353
+ if (totalEnergy > Number.EPSILON) {
354
+ for (let i = 0; i < N_CHROMA; i++) {
355
+ chroma[i] = chroma[i] / totalEnergy;
356
+ }
357
+ }
358
+
359
+ // Convert to regular array and ensure no NaN values
360
+ return Array.from(chroma, v => isNaN(v) ? 0 : v);
361
+ }
362
+
363
+ function extractHNR(segmentData) {
364
+ const frameSize = segmentData.length;
365
+ const autocorrelation = new Float32Array(frameSize);
366
+
367
+ // Compute the autocorrelation iteratively
368
+ for (let i = 0; i < frameSize; i++) {
369
+ let sum = 0;
370
+ for (let j = 0; j < frameSize - i; j++) {
371
+ sum += segmentData[j] * segmentData[j + i];
372
+ }
373
+ autocorrelation[i] = sum;
374
+ }
375
+
376
+ // Find the maximum autocorrelation value iteratively
377
+ let maxAutocorrelation = -Infinity;
378
+ for (let i = 1; i < autocorrelation.length; i++) {
379
+ if (autocorrelation[i] > maxAutocorrelation) {
380
+ maxAutocorrelation = autocorrelation[i];
381
+ }
382
+ }
383
+
384
+ // Compute the HNR
385
+ return autocorrelation[0] !== 0
386
+ ? 10 * Math.log10(maxAutocorrelation / (autocorrelation[0] - maxAutocorrelation))
387
+ : 0;
388
+ }
389
+
390
+ function estimatePitch(segment, sampleRate) {
391
+ // Early validation
392
+ if (!segment || segment.length < 2 || !sampleRate) return 0;
393
+
394
+ try {
395
+ // Apply Hann window
396
+ const windowed = applyHannWindow(segment);
397
+
398
+ // Pad for FFT
399
+ const fftLength = nextPowerOfTwo(segment.length * 2);
400
+ const padded = new Float32Array(fftLength);
401
+ padded.set(windowed);
402
+
403
+ // Perform FFT
404
+ const fft = new FFT(fftLength);
405
+ fft.transform(padded);
406
+
407
+ // Compute power spectrum
408
+ const powerSpectrum = new Float32Array(fftLength / 2 + 1);
409
+ for (let i = 0; i <= fftLength / 2; i++) {
410
+ const re = padded[2 * i];
411
+ const im = padded[2 * i + 1] || 0;
412
+ powerSpectrum[i] = re * re + im * im;
413
+ }
414
+
415
+ // Find peak frequency
416
+ let maxPower = 0;
417
+ let peakIndex = 0;
418
+ const minFreq = 50; // Minimum frequency to consider (Hz)
419
+ const maxFreq = 1000; // Maximum frequency to consider (Hz)
420
+ const minBin = Math.floor(minFreq * fftLength / sampleRate);
421
+ const maxBin = Math.ceil(maxFreq * fftLength / sampleRate);
422
+
423
+ for (let i = minBin; i <= maxBin; i++) {
424
+ if (powerSpectrum[i] > maxPower) {
425
+ maxPower = powerSpectrum[i];
426
+ peakIndex = i;
427
+ }
428
+ }
429
+
430
+ // Convert peak index to frequency
431
+ const fundamentalFreq = peakIndex * sampleRate / fftLength;
432
+
433
+ // Return 0 if the detected frequency is outside reasonable bounds
434
+ return (fundamentalFreq >= minFreq && fundamentalFreq <= maxFreq) ?
435
+ fundamentalFreq : 0;
436
+
437
+ } catch (error) {
438
+ console.error('[Worker] Pitch estimation error:', error);
439
+ return 0;
440
+ }
441
+ }
442
+
2
443
  // Unique ID counter
3
444
  let uniqueIdCounter = 0
4
- let accumulatedDataPoints = [] // Move outside message handler
5
- let lastEmitTime = Date.now() // Move outside message handler
445
+ let accumulatedDataPoints = []
446
+ let lastEmitTime = Date.now()
6
447
 
7
448
  self.onmessage = function (event) {
449
+ // Check if this is a reset command
450
+ if (event.data.command === 'resetCounter') {
451
+ uniqueIdCounter = event.data.startCounterFrom || 0;
452
+ console.log('[Worker] Reset counter to', uniqueIdCounter);
453
+ return; // Exit early, don't process audio
454
+ }
455
+
456
+ // Regular audio processing
8
457
  const {
9
- channelData, // this is only the newly recorded data when live recording.
458
+ channelData,
10
459
  sampleRate,
11
- pointsPerSecond,
460
+ segmentDurationMs,
12
461
  algorithm,
13
462
  bitDepth,
14
463
  fullAudioDurationMs,
15
464
  numberOfChannels,
16
465
  features: _features,
17
- intervalAnalysis = 500, // Use intervalAnalysis instead of interval
466
+ intervalAnalysis = 500,
467
+ enableLogging,
468
+ resetCounter,
469
+ startCounterFrom,
18
470
  } = event.data
471
+
472
+ // Also handle reset as part of regular message
473
+ if (resetCounter) {
474
+ uniqueIdCounter = startCounterFrom || 0;
475
+ }
476
+
477
+ const subChunkStartTime = fullAudioDurationMs / 1000
478
+
479
+
480
+ // Create a simple logger that only logs when enabled
481
+ const logger = enableLogging ? {
482
+ debug: (...args) => console.debug('[Worker]', ...args),
483
+ log: (...args) => console.log('[Worker]', ...args),
484
+ error: (...args) => console.error('[Worker]', ...args)
485
+ } : {
486
+ debug: () => {},
487
+ log: () => {},
488
+ error: () => {}
489
+ }
490
+ console.log('[Worker] START Feature Extractor - hasData: ' + (event.data ? true : false) + ', channelData: ' + (event.data.channelData ? event.data.channelData.length : 0) + ', fullAudioDurationMs: ' + (event.data.fullAudioDurationMs || 0) + ', sampleRate: ' + (event.data.sampleRate || 0) + ', segmentDurationMs: ' + (event.data.segmentDurationMs || 0) + ', algorithm: ' + (event.data.algorithm || 'none') + ', bitDepth: ' + (event.data.bitDepth || 0) + ', numberOfChannels: ' + (event.data.numberOfChannels || 0) + ', features: ' + (event.data.features ? Object.keys(event.data.features).length : 0) + ', intervalAnalysis: ' + (event.data.intervalAnalysis || 0) + ', dataKeys: ' + (event.data ? Object.keys(event.data).join(',') : ''));
491
+
19
492
  const features = _features || {}
20
493
 
21
494
  const SILENCE_THRESHOLD = 0.01
@@ -89,205 +562,228 @@ self.onmessage = function (event) {
89
562
  return [] // TODO implement
90
563
  }
91
564
 
92
- const extractHNR = (segmentData) => {
93
- const frameSize = segmentData.length
94
- const autocorrelation = new Float32Array(frameSize)
565
+ /**
566
+ * Creates a features object based on requested features
567
+ */
568
+ function createFeaturesObject(
569
+ features,
570
+ maxAmp,
571
+ rms,
572
+ sumSquares,
573
+ zeroCrossings,
574
+ remainingSamples,
575
+ spectralFeatures,
576
+ channelData,
577
+ startIdx,
578
+ endIdx,
579
+ sampleRate
580
+ ) {
581
+ // If no features are requested, return undefined
582
+ if (!Object.values(features).some(function(v) { return v; })) {
583
+ return undefined;
584
+ }
95
585
 
96
- // Compute the autocorrelation of the segment data
97
- for (let i = 0; i < frameSize; i++) {
98
- let sum = 0
99
- for (let j = 0; j < frameSize - i; j++) {
100
- sum += segmentData[j] * segmentData[j + i]
101
- }
102
- autocorrelation[i] = sum
586
+ const result = {};
587
+
588
+ if (features.energy) {
589
+ result.energy = sumSquares;
103
590
  }
591
+ if (features.rms) {
592
+ result.rms = rms;
593
+ }
594
+ // Always include min/max amplitude if any features are requested
595
+ result.minAmplitude = -maxAmp;
596
+ result.maxAmplitude = maxAmp;
597
+
598
+ if (features.zcr) {
599
+ result.zcr = zeroCrossings / remainingSamples;
600
+ }
601
+ if (features.spectralCentroid) {
602
+ result.spectralCentroid = spectralFeatures.centroid;
603
+ }
604
+ if (features.spectralFlatness) {
605
+ result.spectralFlatness = spectralFeatures.flatness;
606
+ }
607
+ if (features.spectralRolloff) {
608
+ result.spectralRolloff = spectralFeatures.rollOff;
609
+ }
610
+ if (features.spectralBandwidth) {
611
+ result.spectralBandwidth = spectralFeatures.bandwidth;
612
+ }
613
+ if (features.chromagram) {
614
+ result.chromagram = computeChroma(channelData.slice(startIdx, endIdx), sampleRate);
615
+ }
616
+ if (features.hnr) {
617
+ result.hnr = extractHNR(channelData.slice(startIdx, endIdx));
618
+ }
619
+ if (features.pitch) {
620
+ result.pitch = estimatePitch(channelData.slice(startIdx, endIdx), sampleRate);
621
+ }
622
+
623
+ return result;
624
+ }
104
625
 
105
- // Find the maximum autocorrelation value (excluding the zero lag)
106
- const maxAutocorrelation = Math.max(...autocorrelation.subarray(1))
626
+ function extractWaveform(
627
+ channelData,
628
+ sampleRate,
629
+ segmentDurationMs
630
+ ) {
631
+ const logger = enableLogging ? {
632
+ debug: (...args) => console.debug('[Worker]', ...args),
633
+ log: (...args) => console.log('[Worker]', ...args),
634
+ error: (...args) => console.error('[Worker]', ...args)
635
+ } : {
636
+ debug: () => {},
637
+ log: () => {},
638
+ error: () => {}
639
+ }
107
640
 
108
- // Compute the HNR
109
- return autocorrelation[0] !== 0
110
- ? 10 *
111
- Math.log10(
112
- maxAutocorrelation /
113
- (autocorrelation[0] - maxAutocorrelation)
114
- )
115
- : 0
116
- }
641
+ // Calculate amplitude range
642
+ let min = Infinity
643
+ let max = -Infinity
644
+ for (let i = 0; i < channelData.length; i++) {
645
+ min = Math.min(min, channelData[i])
646
+ max = Math.max(max, channelData[i])
647
+ }
117
648
 
118
- const extractWaveform = (
119
- channelData, // Float32Array
120
- sampleRate, // number
121
- pointsPerSecond, // number
122
- algorithm // string
123
- ) => {
124
649
  const totalSamples = channelData.length
125
- const segmentDuration = totalSamples / sampleRate
126
- const totalPoints = Math.max(
127
- Math.ceil(segmentDuration * pointsPerSecond),
128
- 1
129
- )
130
- const pointInterval = Math.ceil(totalSamples / totalPoints)
131
- const dataPoints = []
132
- let minAmplitude = Infinity
133
- let maxAmplitude = -Infinity
134
- let silenceStart = null
135
- let lastSpeechEnd = -Infinity
136
- let isSpeech = false
137
-
138
- const expectedPoints = segmentDuration * pointsPerSecond
139
- const samplesPerPoint = Math.ceil(channelData.length / expectedPoints)
650
+ const durationMs = (totalSamples / sampleRate) * 1000
651
+
652
+ // Calculate fixed segment sizes
653
+ const samplesPerSegment = Math.floor(sampleRate * (segmentDurationMs / 1000));
654
+ const numPoints = Math.floor(totalSamples / samplesPerSegment);
655
+ const remainingSamples = totalSamples % samplesPerSegment;
140
656
 
141
- for (let i = 0; i < expectedPoints; i++) {
142
- const start = i * samplesPerPoint
143
- const end = Math.min(start + samplesPerPoint, totalSamples)
657
+ const dataPoints = []
144
658
 
659
+ // Process full segments
660
+ for (let i = 0; i < numPoints; i++) {
661
+ const startIdx = i * samplesPerSegment
662
+ const endIdx = startIdx + samplesPerSegment
663
+
145
664
  let sumSquares = 0
665
+ let maxAmp = 0
146
666
  let zeroCrossings = 0
147
- let prevValue = channelData[start]
148
- let localMinAmplitude = Infinity
149
- let localMaxAmplitude = -Infinity
150
- let hasNonZeroValue = false
151
667
 
152
- // compute values for the segment
153
- for (let j = start; j < end; j++) {
668
+ // Calculate segment features
669
+ for (let j = startIdx; j < endIdx; j++) {
154
670
  const value = channelData[j]
155
671
  sumSquares += value * value
156
- if (j > start && value * prevValue < 0) {
672
+ maxAmp = Math.max(maxAmp, Math.abs(value))
673
+ if (j > 0 && value * channelData[j - 1] < 0) {
157
674
  zeroCrossings++
158
675
  }
159
- prevValue = value
160
-
161
- // We need to keep absolute value otherwise we cannot visualize properly
162
- const absValue = Math.abs(value)
163
- localMinAmplitude = Math.min(localMinAmplitude, absValue)
164
- localMaxAmplitude = Math.max(localMaxAmplitude, absValue)
165
-
166
- if (value !== 0) {
167
- hasNonZeroValue = true
168
- }
169
676
  }
170
677
 
171
- // Post-processing checks
172
- if (!hasNonZeroValue) {
173
- // All values are zero
174
- localMinAmplitude = 0
175
- localMaxAmplitude = 0
678
+ const rms = Math.sqrt(sumSquares / samplesPerSegment)
679
+ const startTime = subChunkStartTime + (startIdx / sampleRate)
680
+ const endTime = subChunkStartTime + (endIdx / sampleRate)
681
+
682
+ var spectralFeatures = computeSpectralFeatures(channelData.slice(startIdx, endIdx), sampleRate, features);
683
+
684
+ const dataPoint = {
685
+ id: uniqueIdCounter++,
686
+ amplitude: maxAmp,
687
+ rms,
688
+ startTime,
689
+ endTime,
690
+ dB: 20 * Math.log10(rms + 1e-6),
691
+ silent: rms < 0.01,
692
+ startPosition: startIdx * 2,
693
+ endPosition: endIdx * 2,
694
+ samples: samplesPerSegment,
176
695
  }
177
696
 
178
- const rms = Math.sqrt(sumSquares / (end - start))
179
- minAmplitude = Math.min(minAmplitude, localMinAmplitude)
180
- maxAmplitude = Math.max(maxAmplitude, localMaxAmplitude)
697
+ // Extract features if any are requested
698
+ const extractedFeatures = createFeaturesObject(
699
+ features,
700
+ maxAmp,
701
+ rms,
702
+ sumSquares,
703
+ zeroCrossings,
704
+ samplesPerSegment,
705
+ spectralFeatures,
706
+ channelData,
707
+ startIdx,
708
+ endIdx,
709
+ sampleRate
710
+ );
711
+
712
+ if (extractedFeatures) {
713
+ dataPoint.features = extractedFeatures;
714
+ }
181
715
 
182
- const energy = sumSquares
183
- const zcr = zeroCrossings / (end - start)
716
+ dataPoints.push(dataPoint)
717
+ }
184
718
 
185
- const silent = rms < SILENCE_THRESHOLD
186
- const dB = 20 * Math.log10(rms)
719
+ // Handle remaining samples if they exist and are enough to process
720
+ if (remainingSamples > samplesPerSegment / 4) { // Only process if we have at least 1/4 of a segment
721
+ const startIdx = numPoints * samplesPerSegment
722
+ const endIdx = totalSamples
723
+
724
+ let sumSquares = 0
725
+ let maxAmp = 0
726
+ let zeroCrossings = 0
187
727
 
188
- if (silent) {
189
- if (silenceStart === null) {
190
- silenceStart = start
191
- } else if (start - silenceStart > MIN_SILENCE_DURATION) {
192
- // Silence detected for longer than the threshold, set amplitude to 0
193
- localMaxAmplitude = 0
194
- localMinAmplitude = 0
195
- isSpeech = false
196
- }
197
- } else {
198
- silenceStart = null
199
- if (
200
- !isSpeech &&
201
- start - lastSpeechEnd < SPEECH_INERTIA_DURATION
202
- ) {
203
- isSpeech = true
728
+ for (let j = startIdx; j < endIdx; j++) {
729
+ const value = channelData[j]
730
+ sumSquares += value * value
731
+ maxAmp = Math.max(maxAmp, Math.abs(value))
732
+ if (j > 0 && value * channelData[j - 1] < 0) {
733
+ zeroCrossings++
204
734
  }
205
- lastSpeechEnd = end
206
735
  }
207
736
 
208
- const activeSpeech =
209
- (rms > RMS_THRESHOLD && zcr > ZCR_THRESHOLD) ||
210
- (isSpeech && start - lastSpeechEnd < SPEECH_INERTIA_DURATION)
211
-
212
- if (activeSpeech) {
213
- isSpeech = true
214
- lastSpeechEnd = end
215
- } else {
216
- isSpeech = false
737
+ const rms = Math.sqrt(sumSquares / remainingSamples)
738
+ const startTime = startIdx / sampleRate;
739
+ const endTime = endIdx / sampleRate;
740
+
741
+ var spectralFeatures = computeSpectralFeatures(channelData.slice(startIdx, endIdx), sampleRate, features);
742
+
743
+ const dataPoint = {
744
+ id: uniqueIdCounter++,
745
+ amplitude: maxAmp,
746
+ rms,
747
+ startTime,
748
+ endTime,
749
+ dB: 20 * Math.log10(rms + 1e-6),
750
+ silent: rms < 0.01,
751
+ startPosition: startIdx * 2,
752
+ endPosition: endIdx * 2,
753
+ samples: remainingSamples,
217
754
  }
218
755
 
219
- const bytesPerSample = bitDepth / 8
220
- const startPosition = start * bytesPerSample * numberOfChannels // Calculate start position in bytes
221
- const endPosition = end * bytesPerSample * numberOfChannels // Calculate end position in bytes
222
-
223
- // Compute features
224
- const segmentData = channelData.slice(start, end)
225
- const mfcc = features.mfcc
226
- ? extractMFCC(segmentData, sampleRate)
227
- : []
228
- const spectralCentroid = features.spectralCentroid
229
- ? extractSpectralCentroid(segmentData, sampleRate)
230
- : 0
231
- const spectralFlatness = features.spectralFlatness
232
- ? extractSpectralFlatness(segmentData)
233
- : 0
234
- const spectralRollOff = features.spectralRollOff
235
- ? extractSpectralRollOff(segmentData, sampleRate)
236
- : 0
237
- const spectralBandwidth = features.spectralBandwidth
238
- ? extractSpectralBandwidth(segmentData, sampleRate)
239
- : 0
240
- const chromagram = features.chromagram
241
- ? extractChromagram(segmentData, sampleRate)
242
- : []
243
- const hnr = features.hnr ? extractHNR(segmentData) : 0
244
-
245
- const peakAmp = Math.max(Math.abs(localMaxAmplitude), Math.abs(localMinAmplitude))
246
- const newData = {
247
- id: uniqueIdCounter++, // Assign unique ID and increment the counter
248
- amplitude: algorithm === 'peak' ? peakAmp : rms,
249
- activeSpeech,
250
- dB,
251
- silent,
252
- features: {
253
- energy,
254
- rms,
255
- minAmplitude: localMinAmplitude,
256
- maxAmplitude: localMaxAmplitude,
257
- zcr,
258
- mfcc: [], // Placeholder for MFCC features
259
- spectralCentroid, // Computed spectral centroid
260
- spectralFlatness, // Computed spectral flatness
261
- spectralRollOff, // Computed spectral roll-off
262
- spectralBandwidth, // Computed spectral bandwidth
263
- chromagram, // Computed chromagram
264
- hnr, // Computed HNR
265
- },
266
- startTime: start / sampleRate,
267
- endTime: end / sampleRate,
268
- startPosition,
269
- endPosition,
270
- samples: end - start,
271
- speaker: 0, // Assuming speaker detection is to be handled later
756
+ // Extract features if any are requested
757
+ const extractedFeatures = createFeaturesObject(
758
+ features,
759
+ maxAmp,
760
+ rms,
761
+ sumSquares,
762
+ zeroCrossings,
763
+ remainingSamples,
764
+ spectralFeatures,
765
+ channelData,
766
+ startIdx,
767
+ endIdx,
768
+ sampleRate
769
+ );
770
+
771
+ if (extractedFeatures) {
772
+ dataPoint.features = extractedFeatures;
272
773
  }
273
774
 
274
- dataPoints.push(newData)
775
+ dataPoints.push(dataPoint)
275
776
  }
276
777
 
277
778
  return {
278
- pointsPerSecond,
279
- amplitudeAlgorithm: algorithm,
280
- durationMs: fullAudioDurationMs,
281
- bitDepth,
282
- samples: totalSamples,
283
- numberOfChannels,
284
- sampleRate,
779
+ durationMs,
285
780
  dataPoints,
286
- amplitudeRange: {
287
- min: minAmplitude,
288
- max: maxAmplitude,
781
+ amplitudeRange: { min, max },
782
+ rmsRange: {
783
+ min: 0,
784
+ max: Math.max(Math.abs(min), Math.abs(max))
289
785
  },
290
- speakerChanges: [], // Placeholder for future speaker detection logic
786
+ extractionTimeMs: Date.now() - lastEmitTime
291
787
  }
292
788
  }
293
789
 
@@ -295,33 +791,37 @@ self.onmessage = function (event) {
295
791
  const result = extractWaveform(
296
792
  channelData,
297
793
  sampleRate,
298
- pointsPerSecond,
299
- algorithm
794
+ segmentDurationMs
300
795
  )
301
796
 
302
- // Accumulate data points
303
- accumulatedDataPoints = accumulatedDataPoints.concat(result.dataPoints)
304
-
305
- const currentTime = Date.now()
306
- const shouldEmitAccumulated = currentTime - lastEmitTime >= intervalAnalysis
307
-
308
- if (shouldEmitAccumulated) {
309
- self.postMessage({
310
- command: 'features',
311
- result: {
312
- ...result,
313
- dataPoints: accumulatedDataPoints
314
- }
315
- })
316
- accumulatedDataPoints = [] // Reset accumulator
317
- lastEmitTime = currentTime
318
- }
797
+ // Send complete result immediately
798
+ self.postMessage({
799
+ command: 'features',
800
+ result: {
801
+ bitDepth,
802
+ samples: channelData.length,
803
+ numberOfChannels,
804
+ sampleRate,
805
+ segmentDurationMs,
806
+ durationMs: result.durationMs,
807
+ dataPoints: result.dataPoints,
808
+ amplitudeRange: result.amplitudeRange,
809
+ rmsRange: result.rmsRange,
810
+ }
811
+ })
319
812
  } catch (error) {
320
- console.error('[AudioFeaturesExtractor] Error in processing', error)
321
- self.postMessage({ error: error.message })
322
- } finally {
323
- // Do not close the worker so it can be re-used for subsequent messages
324
- // self.close();
813
+ console.error('[Worker] Error', {
814
+ message: error.message,
815
+ stack: error.stack
816
+ });
817
+
818
+ self.postMessage({
819
+ error: {
820
+ message: error.message,
821
+ stack: error.stack,
822
+ name: error.name
823
+ }
824
+ });
325
825
  }
326
826
  }
327
827
  `;