npm - @siteed/expo-audio-stream - Versions diffs - 1.17.0 → 2.0.1 - Mend

@siteed/expo-audio-stream 1.17.0 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (74) hide show

package/CHANGELOG.md +26 -1
package/README.md +1 -1
package/android/src/main/java/net/siteed/audiostream/AudioAnalysisData.kt +68 -22
package/android/src/main/java/net/siteed/audiostream/AudioFormatUtils.kt +24 -0
package/android/src/main/java/net/siteed/audiostream/AudioProcessor.kt +836 -386
package/android/src/main/java/net/siteed/audiostream/AudioRecorderManager.kt +0 -2
package/android/src/main/java/net/siteed/audiostream/AudioRecordingService.kt +35 -29
package/android/src/main/java/net/siteed/audiostream/ExpoAudioStreamModule.kt +236 -96
package/android/src/main/java/net/siteed/audiostream/FFT.kt +55 -0
package/android/src/main/java/net/siteed/audiostream/Features.kt +49 -7
package/android/src/main/java/net/siteed/audiostream/RecordingConfig.kt +2 -4
package/build/AudioAnalysis/AudioAnalysis.types.d.ts +55 -47
package/build/AudioAnalysis/AudioAnalysis.types.d.ts.map +1 -1
package/build/AudioAnalysis/AudioAnalysis.types.js.map +1 -1
package/build/AudioAnalysis/extractAudioAnalysis.d.ts +60 -13
package/build/AudioAnalysis/extractAudioAnalysis.d.ts.map +1 -1
package/build/AudioAnalysis/extractAudioAnalysis.js +147 -162
package/build/AudioAnalysis/extractAudioAnalysis.js.map +1 -1
package/build/ExpoAudioStream.types.d.ts +47 -3
package/build/ExpoAudioStream.types.d.ts.map +1 -1
package/build/ExpoAudioStream.types.js.map +1 -1
package/build/ExpoAudioStream.web.d.ts.map +1 -1
package/build/ExpoAudioStream.web.js +0 -1
package/build/ExpoAudioStream.web.js.map +1 -1
package/build/ExpoAudioStreamModule.d.ts.map +1 -1
package/build/ExpoAudioStreamModule.js +216 -12
package/build/ExpoAudioStreamModule.js.map +1 -1
package/build/WebRecorder.web.d.ts +67 -13
package/build/WebRecorder.web.d.ts.map +1 -1
package/build/WebRecorder.web.js +177 -173
package/build/WebRecorder.web.js.map +1 -1
package/build/index.d.ts +3 -3
package/build/index.d.ts.map +1 -1
package/build/index.js +2 -2
package/build/index.js.map +1 -1
package/build/useAudioRecorder.d.ts.map +1 -1
package/build/useAudioRecorder.js +12 -8
package/build/useAudioRecorder.js.map +1 -1
package/build/utils/audioProcessing.d.ts +24 -0
package/build/utils/audioProcessing.d.ts.map +1 -0
package/build/utils/audioProcessing.js +133 -0
package/build/utils/audioProcessing.js.map +1 -0
package/build/workers/InlineFeaturesExtractor.web.d.ts +1 -1
package/build/workers/InlineFeaturesExtractor.web.d.ts.map +1 -1
package/build/workers/InlineFeaturesExtractor.web.js +694 -194
package/build/workers/InlineFeaturesExtractor.web.js.map +1 -1
package/build/workers/inlineAudioWebWorker.web.d.ts +1 -1
package/build/workers/inlineAudioWebWorker.web.d.ts.map +1 -1
package/build/workers/inlineAudioWebWorker.web.js +3 -2
package/build/workers/inlineAudioWebWorker.web.js.map +1 -1
package/ios/AudioAnalysisData.swift +51 -16
package/ios/AudioProcessingHelpers.swift +710 -26
package/ios/AudioProcessor.swift +334 -185
package/ios/AudioStreamManager.swift +2 -3
package/ios/DataPoint.swift +25 -12
package/ios/DecodingConfig.swift +47 -0
package/ios/ExpoAudioStreamModule.swift +187 -103
package/ios/FFT.swift +62 -0
package/ios/Features.swift +24 -3
package/ios/RecordingSettings.swift +7 -7
package/package.json +2 -1
package/plugin/build/index.js +6 -1
package/plugin/src/index.ts +9 -1
package/src/AudioAnalysis/AudioAnalysis.types.ts +68 -52
package/src/AudioAnalysis/extractAudioAnalysis.ts +223 -219
package/src/ExpoAudioStream.types.ts +53 -7
package/src/ExpoAudioStream.web.ts +0 -1
package/src/ExpoAudioStreamModule.ts +255 -10
package/src/WebRecorder.web.ts +231 -244
package/src/index.ts +5 -3
package/src/useAudioRecorder.tsx +14 -10
package/src/utils/audioProcessing.ts +205 -0
package/src/workers/InlineFeaturesExtractor.web.tsx +694 -194
package/src/workers/inlineAudioWebWorker.web.tsx +3 -2

package/android/src/main/java/net/siteed/audiostream/AudioProcessor.kt CHANGED Viewed

@@ -1,4 +1,4 @@
-// net/siteed/audiostream/AudioProcessor.kt
+// packages/expo-audio-stream/android/src/main/java/net/siteed/audiostream/AudioProcessor.kt
 package net.siteed.audiostream
 import java.nio.ByteBuffer
@@ -6,15 +6,14 @@ import java.nio.ByteOrder
 import kotlin.math.*
 import android.util.Log
 import java.io.File
-import java.io.IOException
 import java.util.concurrent.atomic.AtomicLong
 import kotlin.system.measureTimeMillis
 import android.media.MediaExtractor
 import android.media.MediaFormat
 import android.media.MediaCodec
 import java.io.FileInputStream
-import java.nio.channels.FileChannel
 import java.io.RandomAccessFile
+import java.util.zip.CRC32
 data class DecodingConfig(
     val targetSampleRate: Int? = null,     // Optional target sample rate
@@ -25,13 +24,9 @@ data class DecodingConfig(
 class AudioProcessor(private val filesDir: File) {
     companion object {
-        const val NUM_MFCC_COEFFICIENTS = 13
-        const val NUM_MEL_FILTERS = 26
-        const val MEL_MIN_FREQ = 0.0
-        const val MEL_MAX_FREQ_DIVISOR = 2595.0
-        const val MEL_MAX_FREQ_CONSTANT = 700.0
         const val DCT_SQRT_DIVISOR = 2.0
-        const val LOG_BASE = 10.0
+        private const val N_FFT = 1024
+        private const val N_CHROMA = 12
         private val uniqueIdCounter = AtomicLong(0L) // Keep as companion object property to maintain during pause/resume cycles
@@ -45,7 +40,7 @@ class AudioProcessor(private val filesDir: File) {
     private var cumulativeMinAmplitude = Float.MAX_VALUE
     private var cumulativeMaxAmplitude = Float.NEGATIVE_INFINITY
-    fun loadAudioFile(filePath: String, debug: Boolean = false): AudioData? {
+    private fun loadAudioFile(filePath: String): AudioData? {
         try {
             val fileUri = filePath.removePrefix("file://")
             Log.d("AudioProcessor", "Processing WAV file: $fileUri")
@@ -66,10 +61,6 @@ class AudioProcessor(private val filesDir: File) {
                 return null
             }
-            // Read file size (4 bytes little-endian)
-            val fileSizeBytes = ByteArray(4).apply { raf.readFully(this) }
-            val expectedFileSize = ByteBuffer.wrap(fileSizeBytes).order(ByteOrder.LITTLE_ENDIAN).int + 8L
             // Read WAVE header
             val waveHeader = ByteArray(4).apply { raf.readFully(this) }
             if (String(waveHeader) != "WAVE") {
@@ -180,18 +171,6 @@ class AudioProcessor(private val filesDir: File) {
         }
     }
-    private fun byteArrayToInt(bytes: ByteArray): Int {
-        return (bytes[0].toInt() and 0xFF) or
-                ((bytes[1].toInt() and 0xFF) shl 8) or
-                ((bytes[2].toInt() and 0xFF) shl 16) or
-                ((bytes[3].toInt() and 0xFF) shl 24)
-    }
-    private fun byteArrayToShort(bytes: ByteArray): Short {
-        return (bytes[0].toInt() and 0xFF or
-                (bytes[1].toInt() and 0xFF shl 8)).toShort()
-    }
     /**
      * Processes the audio data and extracts features.
      * @param data The audio data in bytes.
@@ -199,6 +178,22 @@ class AudioProcessor(private val filesDir: File) {
      * @return AudioAnalysisData containing the extracted features.
      */
     fun processAudioData(data: ByteArray, config: RecordingConfig): AudioAnalysisData {
+        if (data.isEmpty()) {
+            Log.e("AudioProcessor", "Received empty audio data")
+            return AudioAnalysisData(
+                segmentDurationMs = config.segmentDurationMs,
+                durationMs = 0,
+                bitDepth = 16,
+                numberOfChannels = config.channels,
+                sampleRate = config.sampleRate,
+                samples = 0,
+                dataPoints = emptyList(),
+                amplitudeRange = AudioAnalysisData.AmplitudeRange(0f, 0f),
+                rmsRange = AudioAnalysisData.AmplitudeRange(0f, 0f),
+                extractionTimeMs = 0f,
+            )
+        }
         val sampleRate = config.sampleRate.toFloat()
         val bitDepth = when (config.encoding) {
             "pcm_8bit" -> 8
@@ -207,34 +202,33 @@ class AudioProcessor(private val filesDir: File) {
             else -> throw IllegalArgumentException("Unsupported encoding: ${config.encoding}")
         }
         val channelData = convertToFloatArray(data, bitDepth)
-        val pointsPerSecond = config.pointsPerSecond
-        val algorithm = config.algorithm
         val featureOptions = config.features
         val totalSamples = channelData.size
-        val segmentDurationSeconds = totalSamples.toDouble() / sampleRate
-        val totalPoints = max((segmentDurationSeconds * pointsPerSecond).toInt(), 1)
-        val pointInterval = ceil(totalSamples / totalPoints.toDouble()).toInt()
-        Log.d("AudioProcessor", "Extracting waveform totalSize=${data.size} with $totalSamples samples and $pointsPerSecond points per second --> $pointInterval samples per point")
-        Log.d("AudioProcessor", "segmentDuration: $segmentDurationSeconds seconds")
+        // Update samplesPerSegment calculation to use proper formula
+        val samplesPerSegment = ((config.segmentDurationMs / 1000.0) * sampleRate).toInt()
+        val totalPoints = ceil(totalSamples.toDouble() / samplesPerSegment).toInt()
+        Log.d("AudioProcessor", "Extracting waveform totalSize=${data.size} with $totalSamples samples --> $totalPoints points")
+        Log.d("AudioProcessor", "segmentDuration: ${config.segmentDurationMs}ms, samplesPerSegment: $samplesPerSegment")
-        val expectedPoints = segmentDurationSeconds * pointsPerSecond
-        val samplesPerPoint = ceil(channelData.size / expectedPoints).toInt()
-        Log.d("AudioProcessor", "Extracting waveform with expectedPoints=$expectedPoints , samplesPerPoints=$samplesPerPoint")
+        // Remove expectedPoints calculation since it used pointsPerSecond
+        val samplesPerPoint = ceil(channelData.size / totalPoints.toDouble()).toInt()
+        Log.d("AudioProcessor", "Extracting waveform with samplesPerPoints=$samplesPerPoint")
         val dataPoints = mutableListOf<DataPoint>()
         var minAmplitude = Float.MAX_VALUE
         var maxAmplitude = Float.NEGATIVE_INFINITY
-        val durationMs = (segmentDurationSeconds * 1000).toInt()
+        var minRms = Float.MAX_VALUE
+        var maxRms = Float.NEGATIVE_INFINITY
+         // Calculate total duration in milliseconds based on sample rate and total samples
+        val durationMs = (totalSamples.toFloat() / sampleRate * 1000).toInt()
         // Measure the time taken for audio processing
         val extractionTimeMs = measureTimeMillis {
-            var currentPosition = 0 // Track the current byte position
             for (i in 0 until totalPoints) {
-                val start = i * samplesPerPoint
-                val end = min(start + samplesPerPoint, totalSamples)
+                val start = i * samplesPerSegment
+                val end = min(start + samplesPerSegment, totalSamples)
                 val segmentData = channelData.sliceArray(start until end)
                 var sumSquares = 0f
@@ -253,12 +247,23 @@ class AudioProcessor(private val filesDir: File) {
                     localMaxAmplitude = max(localMaxAmplitude, absValue)
                 }
-                val features = computeFeatures(segmentData, sampleRate, minAmplitude, maxAmplitude, sumSquares, zeroCrossings, segmentData.size, featureOptions)
+                val features = computeFeatures(
+                    segmentData = segmentData,
+                    sampleRate = sampleRate,
+                    sumSquares = sumSquares,
+                    zeroCrossings = zeroCrossings,
+                    segmentLength = segmentData.size,
+                    featureOptions = featureOptions,
+                    minAmplitude = localMinAmplitude,
+                    maxAmplitude = localMaxAmplitude
+                )
                 val rms = features.rms
                 val silent = rms < 0.01
-                val dB = if (featureOptions["dB"] == true) 20 * log10(rms.toDouble()).toFloat() else 0f
+                val dB = 20 * log10(rms.toDouble()).toFloat()
                 minAmplitude = min(minAmplitude, localMinAmplitude)
                 maxAmplitude = max(maxAmplitude, localMaxAmplitude)
+                minRms = min(minRms, rms)
+                maxRms = max(maxRms, rms)
                 val bytesPerSample = bitDepth / 8
                 val startPosition = start * bytesPerSample * config.channels
@@ -269,18 +274,18 @@ class AudioProcessor(private val filesDir: File) {
                 cumulativeMaxAmplitude = max(cumulativeMaxAmplitude, localMaxAmplitude)
                 val dataPoint = DataPoint(
-                    id = uniqueIdCounter.getAndIncrement(), // Assign unique ID and increment the counter
-                    amplitude = if (algorithm == "peak") localMaxAmplitude else rms,
-                    activeSpeech = null,
+                    id = uniqueIdCounter.getAndIncrement(),
+                    amplitude = localMaxAmplitude,  // Always use peak amplitude
+                    rms = rms,                      // Always include RMS
                     dB = dB,
                     silent = silent,
                     features = features,
-                    samples = segmentData.size,
+                    speech = SpeechFeatures(isActive = !silent),
                     startTime = startPosition / (sampleRate * bytesPerSample * config.channels),
                     endTime = endPosition / (sampleRate * bytesPerSample * config.channels),
                     startPosition = startPosition,
                     endPosition = endPosition,
-                    speaker = 0
+                    samples = segmentData.size
                 )
                 dataPoints.add(dataPoint)
@@ -288,16 +293,16 @@ class AudioProcessor(private val filesDir: File) {
         }
         return AudioAnalysisData(
-            pointsPerSecond = pointsPerSecond,
+            segmentDurationMs = config.segmentDurationMs,
             durationMs = durationMs,
             bitDepth = bitDepth,
             numberOfChannels = config.channels,
-            sampleRate = config.sampleRate,
-            samples = totalSamples,
+            sampleRate = config.sampleRate,  // Use config.sampleRate instead of sampleRate
+            samples = totalSamples,          // Use totalSamples instead of samplesInRange
             dataPoints = dataPoints,
-            amplitudeRange = AudioAnalysisData.AmplitudeRange(cumulativeMinAmplitude, cumulativeMaxAmplitude),
-            speakerChanges = emptyList(),
-            extractionTimeMs = extractionTimeMs.toFloat() // Return the measured extraction time
+            amplitudeRange = AudioAnalysisData.AmplitudeRange(minAmplitude, maxAmplitude),
+            rmsRange = AudioAnalysisData.AmplitudeRange(minRms, maxRms),
+            extractionTimeMs = extractionTimeMs.toFloat()
         )
     }
@@ -358,45 +363,33 @@ class AudioProcessor(private val filesDir: File) {
         val zcr = if (featureOptions["zcr"] == true) zeroCrossings / segmentLength.toFloat() else 0f
         val mfcc = try {
-            if (featureOptions["mfcc"] == true) extractMFCC(segmentData, sampleRate) else emptyList()
+            if (featureOptions["mfcc"] == true) computeMFCC(segmentData, sampleRate) else emptyList()
         } catch (e: Exception) {
             Log.e("AudioProcessor", "Failed to extract MFCC: ${e.message}", e)
             emptyList()
         }
-        val spectralCentroid = try {
-            if (featureOptions["spectralCentroid"] == true) extractSpectralCentroid(segmentData, sampleRate) else 0f
-        } catch (e: Exception) {
-            Log.e("AudioProcessor", "Failed to extract spectral centroid: ${e.message}", e)
-            0f
-        }
-        val spectralFlatness = try {
-            if (featureOptions["spectralFlatness"] == true) extractSpectralFlatness(segmentData) else 0f
-        } catch (e: Exception) {
-            Log.e("AudioProcessor", "Failed to extract spectral flatness: ${e.message}", e)
-            0f
-        }
-        val spectralRollOff = try {
-            if (featureOptions["spectralRollOff"] == true) extractSpectralRollOff(segmentData, sampleRate) else 0f
+        val melSpectrogram = try {
+            if (featureOptions["melSpectrogram"] == true) computeMelSpectrogram(segmentData, sampleRate) else emptyList()
         } catch (e: Exception) {
-            Log.e("AudioProcessor", "Failed to extract spectral roll-off: ${e.message}", e)
-            0f
+            Log.e("AudioProcessor", "Failed to compute mel spectrogram: ${e.message}", e)
+            emptyList()
         }
-        val spectralBandwidth = try {
-            if (featureOptions["spectralBandwidth"] == true) extractSpectralBandwidth(segmentData, sampleRate) else 0f
+        val chroma = try {
+            if (featureOptions["chromagram"] == true) computeChroma(segmentData, sampleRate) else emptyList()
         } catch (e: Exception) {
-            Log.e("AudioProcessor", "Failed to extract spectral bandwidth: ${e.message}", e)
-            0f
+            Log.e("AudioProcessor", "Failed to compute chroma: ${e.message}", e)
+            emptyList()
         }
-        val chromagram = try {
-            if (featureOptions["chromagram"] == true) extractChromagram(segmentData, sampleRate) else emptyList()
-        } catch (e: Exception) {
-            Log.e("AudioProcessor", "Failed to extract chromagram: ${e.message}", e)
-            emptyList()
+        val spectralFeatures = if (featureOptions["spectralCentroid"] == true ||
+                                 featureOptions["spectralFlatness"] == true ||
+                                 featureOptions["spectralRollOff"] == true ||
+                                 featureOptions["spectralBandwidth"] == true) {
+            extractSpectralFeatures(segmentData, sampleRate)
+        } else {
+            SpectralFeatures()
         }
         val tempo = try {
@@ -413,23 +406,220 @@ class AudioProcessor(private val filesDir: File) {
             0f
         }
+        val spectralContrast = try {
+            if (featureOptions["spectralContrast"] == true) computeSpectralContrast(segmentData, sampleRate) else emptyList()
+        } catch (e: Exception) {
+            Log.e("AudioProcessor", "Failed to compute spectral contrast: ${e.message}", e)
+            emptyList()
+        }
+        val tonnetz = try {
+            if (featureOptions["tonnetz"] == true) computeTonnetz(segmentData, sampleRate) else emptyList()
+        } catch (e: Exception) {
+            Log.e("AudioProcessor", "Failed to compute tonnetz: ${e.message}", e)
+            emptyList()
+        }
+        val pitch = if (featureOptions["pitch"] == true) estimatePitch(segmentData, sampleRate) else 0.0f
+        val crc32Value = if (featureOptions["crc32"] == true) {
+            val byteBuffer = ByteBuffer.allocate(segmentData.size * 4)
+                .order(ByteOrder.LITTLE_ENDIAN)
+            segmentData.forEach { value ->
+                byteBuffer.putFloat(value)
+            }
+            val crc32 = CRC32()
+            crc32.update(byteBuffer.array())
+            crc32.value
+        } else null
         return Features(
             energy = energy,
             mfcc = mfcc,
             rms = rms,
-            zcr = zcr,
             minAmplitude = minAmplitude,
             maxAmplitude = maxAmplitude,
-            spectralCentroid = spectralCentroid,
-            spectralFlatness = spectralFlatness,
-            spectralRollOff = spectralRollOff,
-            spectralBandwidth = spectralBandwidth,
-            chromagram = chromagram,
+            zcr = zcr,
+            spectralCentroid = spectralFeatures.centroid,
+            spectralFlatness = spectralFeatures.flatness,
+            spectralRollOff = spectralFeatures.rollOff,
+            spectralBandwidth = spectralFeatures.bandwidth,
             tempo = tempo,
-            hnr = hnr
+            hnr = hnr,
+            melSpectrogram = melSpectrogram,
+            chromagram = chroma,
+            spectralContrast = spectralContrast,
+            tonnetz = tonnetz,
+            pitch = pitch,
+            crc32 = crc32Value
+        )
+    }
+    private fun extractTempo(segmentData: FloatArray, sampleRate: Float): Float {
+        val hopLength = 512
+        val frameLength = 2048
+        // Compute onset strength signal using spectral flux
+        val onsetEnvelope = mutableListOf<Float>()
+        var previousSpectrum = FloatArray(frameLength / 2)
+        // Process frames with spectral flux
+        for (i in 0 until segmentData.size - frameLength step hopLength) {
+            val frame = segmentData.slice(i until minOf(i + frameLength, segmentData.size)).toFloatArray()
+            val fft = FFT(frameLength)
+            val fftData = frame.copyOf(frameLength)
+            fft.realForward(fftData)
+            // Compute magnitude spectrum
+            val magnitudes = FloatArray(frameLength / 2)
+            for (j in magnitudes.indices) {
+                val re = fftData[2 * j]
+                val im = if (2 * j + 1 < fftData.size) fftData[2 * j + 1] else 0f
+                magnitudes[j] = sqrt(re * re + im * im)
+            }
+            // Calculate spectral flux (sum of positive differences)
+            var flux = 0f
+            for (j in magnitudes.indices) {
+                flux += maxOf(magnitudes[j] - previousSpectrum[j], 0f)
+            }
+            onsetEnvelope.add(flux)
+            previousSpectrum = magnitudes
+        }
+        // Find peaks in onset envelope
+        val peaks = mutableListOf<Int>()
+        for (i in 1 until onsetEnvelope.size - 1) {
+            if (onsetEnvelope[i] > onsetEnvelope[i-1] && onsetEnvelope[i] > onsetEnvelope[i+1]) {
+                peaks.add(i)
+            }
+        }
+        // Calculate tempo from peak intervals
+        return if (peaks.size > 1) {
+            val intervals = peaks.zipWithNext { a, b -> b - a }
+            val averageInterval = intervals.average().toFloat()
+            60f * sampleRate / (hopLength * averageInterval)
+        } else {
+            120f // Default tempo if no clear peaks found
+        }
+    }
+    private fun extractSpectralFeatures(samples: FloatArray, sampleRate: Float): SpectralFeatures {
+        // FFT requires a fixed-size buffer (N_FFT). If our input is larger,
+        // we'll analyze just the first N_FFT samples to prevent buffer overflow.
+        // This is a common practice in audio analysis where we process chunks
+        // of consistent size rather than variable-length segments.
+        val windowed = if (samples.size > N_FFT) {
+            // If samples are larger than FFT size, take the first N_FFT samples
+            applyHannWindow(samples.copyOf(N_FFT))
+        } else {
+            applyHannWindow(samples)
+        }
+        // Create padded array for FFT, ensuring we don't exceed N_FFT size
+        // Zero padding is automatic since FloatArray initializes with zeros
+        val paddedSamples = FloatArray(N_FFT).also { padded ->
+            windowed.copyInto(padded, 0, 0, minOf(windowed.size, N_FFT))
+        }
+        // Perform FFT
+        val fft = FFT(N_FFT)
+        fft.realForward(paddedSamples)
+        // Calculate magnitude spectrum (only need first half due to symmetry)
+        // Add 1 to include both DC (0 Hz) and Nyquist frequency components
+        val magnitudeSpectrum = FloatArray(N_FFT / 2 + 1)
+        for (i in 0 until N_FFT / 2) {  // Since we're only going up to N_FFT/2, the check is unnecessary
+            val re = paddedSamples[2 * i]
+            val im = paddedSamples[2 * i + 1]  // This will always be within bounds
+            magnitudeSpectrum[i] = sqrt(re * re + im * im)
+        }
+        // Handle Nyquist frequency component separately
+        magnitudeSpectrum[N_FFT / 2] = abs(paddedSamples[1])
+        // Compute power spectrum for spectral flatness
+        val powerSpectrum = magnitudeSpectrum.map { it * it }.toFloatArray()
+        // Compute spectral features
+        val centroid = computeSpectralCentroid(magnitudeSpectrum, sampleRate)
+        val flatness = computeSpectralFlatness(powerSpectrum)
+        val rollOff = computeSpectralRollOff(magnitudeSpectrum, sampleRate)
+        val bandwidth = computeSpectralBandwidth(magnitudeSpectrum, sampleRate, centroid)
+        return SpectralFeatures(
+            centroid = centroid,
+            flatness = flatness,
+            rollOff = rollOff,
+            bandwidth = bandwidth
         )
     }
+    private fun computeSpectralCentroid(magnitudeSpectrum: FloatArray, sampleRate: Float): Float {
+        val sum = magnitudeSpectrum.sum()
+        if (sum == 0f) return 0f
+        val weightedSum = magnitudeSpectrum.mapIndexed { index, value ->
+            index * (sampleRate / N_FFT) * value
+        }.sum()
+        return weightedSum / sum
+    }
+    private fun computeSpectralFlatness(powerSpectrum: FloatArray): Float {
+        // Calculate geometric mean using log-space to avoid numerical issues
+        var sumLogValues = 0.0f
+        for (value in powerSpectrum) {
+            sumLogValues += ln(value + 1e-10f) // Add small epsilon to avoid log(0)
+        }
+        val geometricMean = exp(sumLogValues / powerSpectrum.size)
+        // Calculate arithmetic mean
+        val arithmeticMean = powerSpectrum.sum() / powerSpectrum.size
+        return if (arithmeticMean != 0f) geometricMean / arithmeticMean else 0f
+    }
+    private fun computeSpectralRollOff(magnitudeSpectrum: FloatArray, sampleRate: Float): Float {
+        val totalEnergy = magnitudeSpectrum.sum()
+        var cumulativeEnergy = 0f
+        val rollOffThreshold = totalEnergy * 0.85f
+        for ((index, value) in magnitudeSpectrum.withIndex()) {
+            cumulativeEnergy += value
+            if (cumulativeEnergy >= rollOffThreshold) {
+                return index * (sampleRate / N_FFT)
+            }
+        }
+        return 0f
+    }
+    private fun computeSpectralBandwidth(
+        magnitudeSpectrum: FloatArray,
+        sampleRate: Float,
+        centroid: Float
+    ): Float {
+        val sum = magnitudeSpectrum.sum()
+        if (sum == 0f) return 0f
+        // Match iOS frequency calculation
+        val weightedSum = magnitudeSpectrum.mapIndexed { index, value ->
+            val freq = index * sampleRate / (2 * magnitudeSpectrum.size)
+            value * (freq - centroid).pow(2)
+        }.sum()
+        return sqrt(weightedSum / sum)
+    }
+    private data class SpectralFeatures(
+        val centroid: Float = 0f,
+        val flatness: Float = 0f,
+        val rollOff: Float = 0f,
+        val bandwidth: Float = 0f
+    )
     /**
      * Resets the segment data.
      * @param sumSquaresUpdater Function to reset sum of squares.
@@ -453,45 +643,38 @@ class AudioProcessor(private val filesDir: File) {
     }
     /**
-     * Extracts the MFCC (Mel-Frequency Cepstral Coefficients) from the audio data.
-     * @param segmentData The segment data.
-     * @param sampleRate The sample rate of the audio data.
-     * @return The MFCC coefficients.
+     * Computes the MFCC (Mel-Frequency Cepstral Coefficients) from the audio data.
      */
-    private fun extractMFCC(segmentData: FloatArray, sampleRate: Float): List<Float> {
-        if (segmentData.size < 2) {
-            Log.e("AudioProcessor", "Segment data is too small for MFCC extraction: size=${segmentData.size}")
-            return emptyList()
-        }
-        val fftData = segmentData.copyOf()
-        val fft = FFT(fftData.size)
-        fft.realForward(fftData)
+    private fun computeMFCC(samples: FloatArray, sampleRate: Float): List<Float> {
+        val (powerSpectrum, _) = prepareFFT(samples, sampleRate)
+        val melFilters = computeMelFilterbank(
+            numFilters = 26,
+            powerSpectrumSize = powerSpectrum.size,
+            sampleRate = sampleRate
+        )
-        // Compute the power spectrum
-        val powerSpectrum = try {
-            fftData.map { it * it }.chunked(2) { (re, im) -> sqrt(re + im) }
-        } catch (e: Exception) {
-            Log.e("AudioProcessor", "Error computing power spectrum: ${e.message}", e)
+        if (melFilters.any { it.size != powerSpectrum.size }) {
+            Log.e("AudioProcessor", "Mel filter size (${melFilters[0].size}) does not match power spectrum size (${powerSpectrum.size})")
             return emptyList()
         }
-        // Compute Mel filter bank
-        val melFilterBank = computeMelFilterBank(NUM_MEL_FILTERS, powerSpectrum.size, sampleRate)
-        val filterEnergies = melFilterBank.map { filter ->
-            filter.zip(powerSpectrum).sumOf { (f, p) -> (f * p).toDouble() }.toFloat()
+        val melEnergies = FloatArray(26) { i ->
+            var energy = 0f
+            for (j in powerSpectrum.indices) {
+                energy += powerSpectrum[j] * melFilters[i][j]
+            }
+            ln(maxOf(energy, 1e-10f))
         }
-        // Apply log to filter energies
-        val logEnergies = filterEnergies.map { ln(it + Float.MIN_VALUE) }
-        // Compute Discrete Cosine Transform (DCT) of log energies to get MFCCs
-        return try {
-            computeDCT(logEnergies, NUM_MFCC_COEFFICIENTS)
-        } catch (e: Exception) {
-            Log.e("AudioProcessor", "Error computing DCT: ${e.message}", e)
-            emptyList()
+        val mfcc = FloatArray(13) { i ->
+            var sum = 0f
+            for (j in melEnergies.indices) {
+                sum += melEnergies[j] * cos(PI * i * (2 * j + 1) / (2 * 26)).toFloat()
+            }
+            sum * sqrt(2f / 26)
         }
+        return mfcc.toList()
     }
     /**
@@ -501,32 +684,53 @@ class AudioProcessor(private val filesDir: File) {
      * @param sampleRate The sample rate of the audio data.
      * @return A list of Mel filters.
      */
-    private fun computeMelFilterBank(numFilters: Int, powerSpectrumSize: Int, sampleRate: Float): List<List<Float>> {
-        val melFilters = mutableListOf<List<Float>>()
-        val melMaxFreq = MEL_MAX_FREQ_DIVISOR * log10(1.0 + sampleRate / 2.0 / MEL_MAX_FREQ_CONSTANT)
-        val melPoints = DoubleArray(numFilters + 2) { i ->
-            MEL_MIN_FREQ + i * (melMaxFreq - MEL_MIN_FREQ) / (numFilters + 1)
+    private fun computeMelFilterbank(numFilters: Int, powerSpectrumSize: Int, sampleRate: Float): Array<FloatArray> {
+        val fMin = 0f
+        val fMax = sampleRate / 2
+        // Convert Hz to Mel
+        val melMin = hzToMel(fMin)
+        val melMax = hzToMel(fMax)
+        // Create equally spaced points in Mel scale
+        val melPoints = FloatArray(numFilters + 2)
+        val melStep = (melMax - melMin) / (numFilters + 1)
+        for (i in melPoints.indices) {
+            melPoints[i] = melMin + i * melStep
         }
-        val hzPoints = melPoints.map { MEL_MAX_FREQ_CONSTANT * (LOG_BASE.pow(it / MEL_MAX_FREQ_DIVISOR) - 1.0) }
-        val bin = hzPoints.map { it * (powerSpectrumSize - 1) / sampleRate }
+        // Convert back to Hz
+        val hzPoints = melPoints.map { melToHz(it) }
+        // Convert to FFT bin numbers, clamping to valid range
+        val bins = hzPoints.map { minOf((it * powerSpectrumSize / sampleRate).roundToInt(), powerSpectrumSize - 1) }.toList()
+        // Create the filterbank matrix with size matching powerSpectrumSize
+        val filterbank = Array(numFilters) { FloatArray(powerSpectrumSize) { 0f } }
-        for (i in 1..numFilters) {
-            val filter = FloatArray(powerSpectrumSize)
-            for (j in bin[i - 1].toInt() until bin[i].toInt()) {
-                if (j >= 0 && j < filter.size) {
-                    filter[j] = ((j - bin[i - 1]) / (bin[i] - bin[i - 1])).toFloat()
+        // Ensure safe access to bins by limiting the loop and checking boundaries
+        for (i in 0 until numFilters) {
+            if (i + 2 < bins.size) { // Check to prevent out-of-bounds access
+                val startBin = bins[i]
+                val centerBin = bins[i + 1]
+                val endBin = bins[i + 2]
+                // Left slope (ascending triangle)
+                if (centerBin > startBin) {
+                    for (j in startBin until centerBin) {
+                        filterbank[i][j] = (j - startBin).toFloat() / (centerBin - startBin).toFloat()
+                    }
                 }
-            }
-            for (j in bin[i].toInt() until bin[i + 1].toInt()) {
-                if (j >= 0 && j < filter.size) {
-                    filter[j] = ((bin[i + 1] - j) / (bin[i + 1] - bin[i])).toFloat()
+                // Right slope (descending triangle)
+                if (endBin > centerBin) {
+                    for (j in centerBin until endBin) {
+                        filterbank[i][j] = (endBin - j).toFloat() / (endBin - centerBin).toFloat()
+                    }
                 }
             }
-            melFilters.add(filter.toList())
         }
-        return melFilters
+        return filterbank
     }
     /**
@@ -550,168 +754,11 @@ class AudioProcessor(private val filesDir: File) {
         return dct.toList()
     }
-    /**
-     * Extracts the spectral centroid from the audio data.
-     * @param segmentData The segment data.
-     * @param sampleRate The sample rate of the audio data.
-     * @return The spectral centroid.
-     */
-    private fun extractSpectralCentroid(segmentData: FloatArray, sampleRate: Float): Float {
-        val magnitudeSpectrum = segmentData.map { it * it }.toFloatArray()
-        val sum = magnitudeSpectrum.sum()
-        if (sum == 0f) return 0f
-        val weightedSum = magnitudeSpectrum.mapIndexed { index, value -> index * value }.sum()
-        return (weightedSum / sum) * (sampleRate / 2) / magnitudeSpectrum.size
-    }
-    /**
-     * Extracts the spectral flatness from the audio data.
-     * @param segmentData The segment data.
-     * @return The spectral flatness.
-     */
-    private fun extractSpectralFlatness(segmentData: FloatArray): Float {
-        val magnitudeSpectrum = segmentData.map { abs(it) }
-        val geometricMean = exp(magnitudeSpectrum.map { ln(it + Float.MIN_VALUE) }.average()).toFloat()
-        val arithmeticMean = magnitudeSpectrum.average().toFloat()
-        return if (arithmeticMean != 0f) geometricMean / arithmeticMean else 0f
-    }
-    /**
-     * Extracts the spectral roll-off from the audio data.
-     * @param segmentData The segment data.
-     * @param sampleRate The sample rate of the audio data.
-     * @return The spectral roll-off.
-     */
-    private fun extractSpectralRollOff(segmentData: FloatArray, sampleRate: Float): Float {
-        val magnitudeSpectrum = segmentData.map { abs(it) }
-        val totalEnergy = magnitudeSpectrum.sum()
-        var cumulativeEnergy = 0f
-        val rollOffThreshold = totalEnergy * 0.85f
-        for ((index, value) in magnitudeSpectrum.withIndex()) {
-            cumulativeEnergy += value
-            if (cumulativeEnergy >= rollOffThreshold) {
-                return index.toFloat() / magnitudeSpectrum.size * (sampleRate / 2)
-            }
-        }
-        return 0f
-    }
-    /**
-     * Extracts the spectral bandwidth from the audio data.
-     * @param segmentData The segment data.
-     * @param sampleRate The sample rate of the audio data.
-     * @return The spectral bandwidth.
-     */
-    private fun extractSpectralBandwidth(segmentData: FloatArray, sampleRate: Float): Float {
-        val centroid = extractSpectralCentroid(segmentData, sampleRate)
-        val magnitudeSpectrum = segmentData.map { abs(it) }
-        val sum = magnitudeSpectrum.sum()
-        if (sum == 0f) return 0f
-        val weightedSum = magnitudeSpectrum.mapIndexed { index, value -> value * (index - centroid).pow(2) }.sum()
-        return sqrt(weightedSum / sum)
-    }
-    /**
-     * Extracts the chromagram from the audio data.
-     * @param segmentData The segment data.
-     * @param sampleRate The sample rate of the audio data.
-     * @return The chromagram.
-     */
-    private fun extractChromagram(segmentData: FloatArray, sampleRate: Float): List<Float> {
-        val fftData = segmentData.copyOf()
-        val fft = FFT(fftData.size)
-        fft.realForward(fftData)
-        // Compute the magnitude spectrum
-        val magnitudeSpectrum = fftData.map { abs(it) }
-        // Initialize the chromagram with 12 bins (one for each pitch class)
-        val chromagram = FloatArray(12)
-        // Map frequencies to pitch classes
-        for (i in magnitudeSpectrum.indices) {
-            val freq = i * sampleRate / magnitudeSpectrum.size
-            val pitchClass = (12 * log2(freq / 440.0) % 12).toInt()
-            if (pitchClass in 0..11) {
-                chromagram[pitchClass] += magnitudeSpectrum[i]
-            }
-        }
-        return chromagram.toList()
-    }
-    /**
-     * Extracts the tempo from the audio data.
-     * @param segmentData The segment data.
-     * @param sampleRate The sample rate of the audio data.
-     * @return The tempo.
-     */
-    private fun extractTempo(segmentData: FloatArray, sampleRate: Float): Float {
-        // Calculate the onset strength envelope
-        val onsetEnv = calculateOnsetEnvelope(segmentData, sampleRate)
-        // Find peaks in the onset envelope
-        val peaks = findPeaks(onsetEnv)
-        // Calculate the inter-onset intervals (IOIs)
-        val iois = peaks.zipWithNext { a, b -> (b - a).toFloat() / sampleRate }
-        // Calculate the tempo in beats per minute (BPM)
-        val avgIoi = iois.average().toFloat()
-        return if (avgIoi != 0f) 60f / avgIoi else 0f
-    }
-    /**
-     * Calculates the onset envelope of the audio signal.
-     * @param segmentData The segment data.
-     * @param sampleRate The sample rate of the audio data.
-     * @return The onset envelope.
-     */
-    private fun calculateOnsetEnvelope(segmentData: FloatArray, sampleRate: Float): FloatArray {
-        val frameSize = sampleRate.toInt() / 100 // Assume 10ms frames
-        val onsetEnv = FloatArray(segmentData.size / frameSize)
-        var previousSpectrum = FloatArray(frameSize)
-        for (i in onsetEnv.indices) {
-            val frame = segmentData.sliceArray(i * frameSize until min((i + 1) * frameSize, segmentData.size))
-            val magnitudeSpectrum = frame.map { abs(it) }.toFloatArray()
-            val onset = magnitudeSpectrum.zip(previousSpectrum) { a, b -> max(0f, a - b) }.sum()
-            onsetEnv[i] = onset
-            previousSpectrum = magnitudeSpectrum
-        }
-        return onsetEnv
-    }
-    /**
-     * Finds the peaks in the onset envelope.
-     * @param onsetEnv The onset envelope.
-     * @return A list of peak indices.
-     */
-    private fun findPeaks(onsetEnv: FloatArray): List<Int> {
-        val peaks = mutableListOf<Int>()
-        for (i in 1 until onsetEnv.size - 1) {
-            if (onsetEnv[i] > onsetEnv[i - 1] && onsetEnv[i] > onsetEnv[i + 1]) {
-                peaks.add(i)
-            }
-        }
-        return peaks
-    }
     /**
      * Extracts the HNR (Harmonics-to-Noise Ratio) from the audio data.
      * @param segmentData The segment data.
      * @return The HNR.
      */
-    /**
-     * Extracts the HNR (Harmonics-to-Noise Ratio) from the audio data.
-     * @param segmentData The segment data as FloatArray.
-     * @return The HNR.
-     */
     private fun extractHNR(segmentData: FloatArray): Float {
         val frameSize = segmentData.size
         val autocorrelation = FloatArray(frameSize)
@@ -725,11 +772,33 @@ class AudioProcessor(private val filesDir: File) {
             autocorrelation[i] = sum
         }
-        // Find the maximum autocorrelation value (excluding the zero lag)
-        val maxAutocorrelation = autocorrelation.drop(1).maxOrNull() ?: 0f
+        // Find peaks with minimum prominence
+        val maxAutocorrelation = autocorrelation.maxOrNull() ?: 0f
+        val peaks = findPeaks(autocorrelation, minProminence = 0.1f * maxAutocorrelation)
+        if (peaks.isNotEmpty()) {
+            val firstPeakIndex = peaks.firstOrNull { it > 0 } ?: 0
+            val harmonicEnergy = autocorrelation[firstPeakIndex]
+            val noiseEnergy = autocorrelation[0] - harmonicEnergy
+            if (noiseEnergy > 0) {
+                return 10 * log10(harmonicEnergy / noiseEnergy)
+            }
+        }
+        return 0f
+    }
-        // Compute the HNR
-        return if (autocorrelation[0] != 0f) 10 * log10(maxAutocorrelation / (autocorrelation[0] - maxAutocorrelation)) else 0f
+    private fun findPeaks(data: FloatArray, minProminence: Float): List<Int> {
+        val peaks = mutableListOf<Int>()
+        for (i in 1 until data.size - 1) {
+            if (data[i] > data[i - 1] && data[i] > data[i + 1]) {
+                val prominence = data[i] - maxOf(data[i - 1], data[i + 1])
+                if (prominence >= minProminence) {
+                    peaks.add(i)
+                }
+            }
+        }
+        return peaks
     }
     fun loadAudioFromAnyFormat(fileUri: String, decodingConfig: DecodingConfig? = null): AudioData? {
@@ -799,7 +868,7 @@ class AudioProcessor(private val filesDir: File) {
         // If MediaExtractor failed and file is WAV, try WAV parser
         if (file.name.lowercase().endsWith(".wav")) {
             Log.d("AudioProcessor", "Falling back to WAV parser")
-            return loadAudioFile(file.absolutePath, false)?.let { wavData ->
+            return loadAudioFile(file.absolutePath)?.let { wavData ->
                 if (decodingConfig != null) {
                     val processedData = processAudio(
                         wavData.data,
@@ -987,7 +1056,7 @@ class AudioProcessor(private val filesDir: File) {
         val inputBuffer = ByteBuffer.wrap(pcmData).order(ByteOrder.LITTLE_ENDIAN).asShortBuffer()
         val outputBuffer = ByteBuffer.wrap(result).order(ByteOrder.LITTLE_ENDIAN).asShortBuffer()
-        for (i in 0 until result.size) {
+        for (i in result.indices) {
             val channelData = ShortArray(targetChannels)
             for (j in 0 until targetChannels) {
                 channelData[j] = inputBuffer.get()
@@ -1076,6 +1145,8 @@ class AudioProcessor(private val filesDir: File) {
         val dataPoints = mutableListOf<DataPoint>()
         var minAmplitude = Float.MAX_VALUE
         var maxAmplitude = Float.MIN_VALUE
+        var minRms = Float.MAX_VALUE      // Add minRms
+        var maxRms = Float.MIN_VALUE      // Add maxRms
         val extractionTimeMs = measureTimeMillis {
             for (i in 0 until numberOfPoints) {
@@ -1098,22 +1169,27 @@ class AudioProcessor(private val filesDir: File) {
                     val startTimePoint = ((pointStartSample * 1000L) / (audioData.sampleRate * audioData.channels)).toFloat()
                     val endTimePoint = ((pointEndSample * 1000L) / (audioData.sampleRate * audioData.channels)).toFloat()
-                    val amplitude = when (config.algorithm.lowercase()) {
-                        "peak" -> segmentData.maxOf { abs(it) }
-                        else -> sqrt(segmentData.map { it * it }.average().toFloat())
-                    }
+                    val rms = sqrt(segmentData.map { it * it }.average().toFloat())
+                    val amplitude = segmentData.maxOf { abs(it) }  // Always use peak amplitude
                     minAmplitude = minOf(minAmplitude, amplitude)
                     maxAmplitude = maxOf(maxAmplitude, amplitude)
+                    minRms = minOf(minRms, rms)
+                    maxRms = maxOf(maxRms, rms)
                     dataPoints.add(DataPoint(
                         id = i.toLong(),
-                        amplitude = amplitude,
+                        amplitude = amplitude,  // Peak amplitude
+                        rms = rms,             // RMS value
+                        dB = 20 * log10(amplitude.toDouble()).toFloat(),
+                        silent = amplitude < 0.01,
+                        features = null,
+                        speech = null,
                         startTime = startTimePoint,
                         endTime = endTimePoint,
                         startPosition = pointStartSample,
                         endPosition = pointEndSample,
-                        samples = pointEndSample - pointStartSample
+                        samples = segmentData.size
                     ))
                 } catch (e: Exception) {
                     Log.e(Constants.TAG, "Error processing segment $i: ${e.message}")
@@ -1127,7 +1203,7 @@ class AudioProcessor(private val filesDir: File) {
         }
         return AudioAnalysisData(
-            pointsPerSecond = pointsPerSecond,
+            segmentDurationMs = config.segmentDurationMs,
             durationMs = durationMs.toInt(),
             bitDepth = audioData.bitDepth,
             numberOfChannels = audioData.channels,
@@ -1135,7 +1211,7 @@ class AudioProcessor(private val filesDir: File) {
             samples = samplesInRange,
             dataPoints = dataPoints,
             amplitudeRange = AudioAnalysisData.AmplitudeRange(minAmplitude, maxAmplitude),
-            speakerChanges = emptyList(),
+            rmsRange = AudioAnalysisData.AmplitudeRange(minRms, maxRms),
             extractionTimeMs = extractionTimeMs.toFloat()
         )
     }
@@ -1157,32 +1233,34 @@ class AudioProcessor(private val filesDir: File) {
         return bytes.map { (it.toInt() - 128).toFloat() / 127f }.toFloatArray()
     }
-    fun loadAudioRange(
-        fileUri: String,
-        startTimeMs: Long? = null,
-        endTimeMs: Long? = null,
-        config: DecodingConfig
-    ): AudioData? {
+    fun loadAudioRange(fileUri: String, startTimeMs: Long, endTimeMs: Long, config: DecodingConfig? = null): AudioData? {
         try {
-            // Clean up the URI and get a proper File object
-            val cleanUri = fileUri.removePrefix("file://")
-            val file = File(cleanUri).takeIf { it.exists() } ?: File(filesDir, File(cleanUri).name).takeIf { it.exists() }
-                ?: run {
-                    Log.e(Constants.TAG, "File not found in any location: $cleanUri")
-                    return null
-                }
-            // Check if it's a WAV file by reading first 4 bytes
-            val isWav = FileInputStream(file).use { fis ->
-                val header = ByteArray(4)
-                fis.read(header)
-                String(header) == "RIFF"
-            }
+            // Use default config if none provided
+            val effectiveConfig = config ?: DecodingConfig(
+                targetSampleRate = null,
+                targetChannels = null,
+                targetBitDepth = 16,
+                normalizeAudio = false
+            )
-            return if (isWav) {
-                loadWavRange(file, startTimeMs, endTimeMs, config)
+            // First check if it's a WAV file by extension
+            val isWavByExtension = fileUri.lowercase().endsWith(".wav")
+            // Then verify WAV header if needed
+            val headerSize = if (isWavByExtension) {
+                getWavHeaderSize(fileUri)
+            } else null
+            // If it's a WAV file (by extension and header verification)
+            return if (isWavByExtension && headerSize != null) {
+                Log.d(Constants.TAG, "Loading WAV range with header size: $headerSize bytes")
+                loadWavRange(fileUri, startTimeMs, endTimeMs, effectiveConfig, headerSize)
             } else {
-                loadCompressedAudioRange(file, startTimeMs, endTimeMs, config)
+                if (isWavByExtension) {
+                    Log.w(Constants.TAG, "File has .wav extension but invalid header, falling back to compressed loader")
+                }
+                Log.d(Constants.TAG, "Loading compressed audio range")
+                loadCompressedAudioRange(fileUri, startTimeMs, endTimeMs, effectiveConfig)
             }
         } catch (e: Exception) {
             Log.e(Constants.TAG, "Failed to load audio range: ${e.message}", e)
@@ -1191,52 +1269,59 @@ class AudioProcessor(private val filesDir: File) {
     }
     private fun loadWavRange(
-        file: File,
-        startTimeMs: Long?,
-        endTimeMs: Long?,
-        config: DecodingConfig
+        fileUri: String,
+        startTimeMs: Long,
+        endTimeMs: Long,
+        config: DecodingConfig,
+        headerSize: Int
     ): AudioData? {
         try {
-            // Read WAV header to get format info
-            val fis = FileInputStream(file)
-            val headerBuffer = ByteArray(44)  // WAV header is 44 bytes
-            fis.read(headerBuffer)
-            // Parse WAV header
-            val sampleRate = ByteBuffer.wrap(headerBuffer, 24, 4).order(ByteOrder.LITTLE_ENDIAN).int
-            val channels = ByteBuffer.wrap(headerBuffer, 22, 2).order(ByteOrder.LITTLE_ENDIAN).short.toInt()
-            val bitDepth = ByteBuffer.wrap(headerBuffer, 34, 2).order(ByteOrder.LITTLE_ENDIAN).short.toInt()
-            // Calculate duration
-            val bytesPerFrame = channels * (bitDepth / 8)
-            val numFrames = (file.length() - 44) / bytesPerFrame  // Subtract header size
-            val durationMs = (numFrames * 1000L) / sampleRate
+            val file = File(fileUri.removePrefix("file://")).takeIf { it.exists() }
+                ?: File(filesDir, File(fileUri).name).takeIf { it.exists() }
+                ?: throw IllegalArgumentException("File not found: $fileUri")
+            // Use existing method to get audio format
+            val format = getAudioFormat(fileUri) ?: throw IllegalArgumentException("Could not determine audio format")
-            // Calculate positions
-            val startByte = 44 + ((startTimeMs ?: 0) * sampleRate * bytesPerFrame / 1000)
-            val endByte = 44 + ((endTimeMs ?: (file.length() * 1000 / (sampleRate * bytesPerFrame))) * sampleRate * bytesPerFrame / 1000)
-            val length = (endByte - startByte).toInt()
+            val bytesPerSecond = format.sampleRate * format.channels * (format.bitDepth / 8)
+            val startByteOffset = ((startTimeMs * bytesPerSecond) / 1000).toInt()
+            val endByteOffset = ((endTimeMs * bytesPerSecond) / 1000).toInt()
+            val startByte = headerSize + startByteOffset
+            val endByte = headerSize + endByteOffset
             Log.d(Constants.TAG, """
-                Loading WAV section:
-                - start: ${startTimeMs}ms (pos: $startByte)
-                - end: ${endTimeMs}ms (pos: $endByte)
-                - length: $length bytes
-                - format: ${sampleRate}Hz, $channels channels, $bitDepth-bit
+                Loading WAV range:
+                - headerSize: $headerSize
+                - startByte: $startByte
+                - endByte: $endByte
+                - bytesPerSecond: $bytesPerSecond
             """.trimIndent())
-            // Read the requested section
-            val audioData = ByteArray(length)
-            fis.skip(startByte - 44)  // Skip to start position (accounting for header we already read)
-            fis.read(audioData)
-            fis.close()
+            var audioDataBytes = ByteArray((endByte - startByte).coerceAtLeast(0))
+            FileInputStream(file).use { fis ->
+                fis.skip(startByte.toLong())
+                fis.read(audioDataBytes)
+            }
+            // Apply bit depth conversion if needed
+            var effectiveBitDepth = format.bitDepth
+            if (config.targetBitDepth != format.bitDepth) {
+                audioDataBytes = AudioFormatUtils.convertBitDepth(
+                    audioDataBytes,
+                    format.bitDepth,
+                    config.targetBitDepth
+                )
+                effectiveBitDepth = config.targetBitDepth
+                Log.d(Constants.TAG, "Converted bit depth from ${format.bitDepth} to ${config.targetBitDepth}")
+            }
             return AudioData(
-                data = audioData,
-                sampleRate = config.targetSampleRate ?: sampleRate,
-                channels = config.targetChannels ?: channels,
-                bitDepth = config.targetBitDepth ?: bitDepth,
-                durationMs = durationMs  // Pass the duration
+                data = audioDataBytes,
+                sampleRate = format.sampleRate,
+                channels = format.channels,
+                bitDepth = effectiveBitDepth,
+                durationMs = endTimeMs - startTimeMs
             )
         } catch (e: Exception) {
             Log.e(Constants.TAG, "Failed to load WAV range: ${e.message}", e)
@@ -1245,16 +1330,16 @@ class AudioProcessor(private val filesDir: File) {
     }
     private fun loadCompressedAudioRange(
-        file: File,
-        startTimeMs: Long?,
-        endTimeMs: Long?,
+        fileUri: String,
+        startTimeMs: Long,
+        endTimeMs: Long,
         config: DecodingConfig
     ): AudioData? {
         val extractor = MediaExtractor()
         var decoder: MediaCodec? = null
         try {
-            extractor.setDataSource(file.absolutePath)
+            extractor.setDataSource(fileUri.removePrefix("file://"))
             val format = extractor.getTrackFormat(0)
             extractor.selectTrack(0)
@@ -1271,8 +1356,8 @@ class AudioProcessor(private val filesDir: File) {
             Log.d("AudioProcessor", "Final duration: ${totalDurationMs}ms")
             // Calculate valid time range
-            val validStartMs = startTimeMs?.coerceIn(0, totalDurationMs) ?: 0
-            val validEndMs = endTimeMs?.coerceIn(validStartMs, totalDurationMs) ?: totalDurationMs
+            val validStartMs = startTimeMs.coerceIn(0, totalDurationMs) ?: 0
+            val validEndMs = endTimeMs.coerceIn(validStartMs, totalDurationMs) ?: totalDurationMs
             val effectiveDurationMs = validEndMs - validStartMs
             // Initialize decoder
@@ -1302,7 +1387,7 @@ class AudioProcessor(private val filesDir: File) {
                 - format: ${targetSampleRate}Hz, $targetChannels channels, $targetBitDepth-bit
             """.trimIndent())
-            val outputBuffer = ByteBuffer.allocateDirect(totalBytes.toInt())
+            val outputBuffer = ByteBuffer.allocate(totalBytes.toInt())
             val bufferInfo = MediaCodec.BufferInfo()
             var isEOS = false
@@ -1332,18 +1417,25 @@ class AudioProcessor(private val filesDir: File) {
                 // Handle output
                 val outputBufferId = decoder.dequeueOutputBuffer(bufferInfo, 10000)
                 if (outputBufferId >= 0) {
-                    val outputBuffer = decoder.getOutputBuffer(outputBufferId)!!
+                    val decodedBuffer = decoder.getOutputBuffer(outputBufferId)!!
                     if (bufferInfo.size > 0) {
-                        outputBuffer.limit(bufferInfo.offset + bufferInfo.size)
-                        outputBuffer.position(bufferInfo.offset)
-                        if (outputBuffer.remaining() <= totalBytes - outputBuffer.position()) {
-                            outputBuffer.get(ByteArray(outputBuffer.remaining()))
-                        }
+                        // Set buffer position and limit based on the decoded data
+                        decodedBuffer.position(bufferInfo.offset)
+                        decodedBuffer.limit(bufferInfo.offset + bufferInfo.size)
+                        // Copy decoded data to our output buffer
+                        outputBuffer.put(decodedBuffer)
                     }
                     decoder.releaseOutputBuffer(outputBufferId, false)
+                    // Check if we've reached the end
+                    if ((bufferInfo.flags and MediaCodec.BUFFER_FLAG_END_OF_STREAM) != 0) {
+                        isEOS = true
+                    }
                 }
             }
+            // Prepare the final byte array
             outputBuffer.flip()
             val audioData = ByteArray(outputBuffer.remaining())
             outputBuffer.get(audioData)
@@ -1353,7 +1445,7 @@ class AudioProcessor(private val filesDir: File) {
                 sampleRate = targetSampleRate,
                 channels = targetChannels,
                 bitDepth = targetBitDepth,
-                durationMs = effectiveDurationMs  // Pass the duration
+                durationMs = endTimeMs - startTimeMs  // Use the actual time range
             ).also {
                 Log.d(Constants.TAG, "Loaded compressed audio with duration: ${effectiveDurationMs}ms")
             }
@@ -1483,4 +1575,362 @@ class AudioProcessor(private val filesDir: File) {
         // This will help ensure consistent format when joining sections
         return audioData
     }
+    // Add new function to process entire file
+    fun processEntireFile(audioData: AudioData): Features {
+        val samples = convertToFloatArray(audioData.data, audioData.bitDepth)
+        // Compute basic features for the entire file
+        val sumSquares = samples.sumOf { it * it.toDouble() }.toFloat()
+        val segmentLength = samples.size
+        val zeroCrossings = countZeroCrossings(samples)
+        val minAmplitude = samples.minOrNull() ?: 0f
+        val maxAmplitude = samples.maxOrNull() ?: 0f
+        // Use existing computeFeatures with the entire file as one segment
+        return computeFeatures(
+            segmentData = samples,
+            sampleRate = audioData.sampleRate.toFloat(),
+            sumSquares = sumSquares,
+            zeroCrossings = zeroCrossings,
+            segmentLength = segmentLength,
+            minAmplitude = minAmplitude,
+            maxAmplitude = maxAmplitude,
+            featureOptions = mapOf() // Dont compute complex features
+        )
+    }
+    private fun countZeroCrossings(data: FloatArray): Int {
+        var crossings = 0
+        for (i in 1 until data.size) {
+            if (data[i - 1] * data[i] < 0) crossings++
+        }
+        return crossings
+    }
+    private fun hzToMel(hz: Float): Float {
+        return 2595f * log10(1f + hz / 700f)
+    }
+    private fun melToHz(mel: Float): Float {
+        return 700f * (10f.pow(mel / 2595f) - 1f)
+    }
+    private fun applyHannWindow(samples: FloatArray): FloatArray {
+        val output = FloatArray(samples.size)
+        for (i in samples.indices) {
+            val multiplier = 0.5f * (1f - cos(2f * PI.toFloat() * i / (samples.size - 1)))
+            output[i] = samples[i] * multiplier
+        }
+        return output
+    }
+    private fun computeMelSpectrogram(samples: FloatArray, sampleRate: Float): List<Float> {
+        val (powerSpectrum, _) = prepareFFT(samples, sampleRate)
+        val melFilters = computeMelFilterbank(
+            numFilters = 128,
+            powerSpectrumSize = powerSpectrum.size,
+            sampleRate = sampleRate
+        )
+        // Apply Mel filters to power spectrum
+        return melFilters.map { filter ->
+            var energy = 0f
+            for (j in powerSpectrum.indices) {
+                energy += powerSpectrum[j] * filter[j]
+            }
+            kotlin.math.ln(maxOf(energy, 1e-10f))
+        }
+    }
+    private fun computeChroma(samples: FloatArray, sampleRate: Float): List<Float> {
+        val (_, magnitudeSpectrum) = prepareFFT(samples, sampleRate)
+        val chroma = FloatArray(N_CHROMA) { 0f }
+        val freqsPerBin = sampleRate / N_FFT
+        for (i in 0 until N_FFT / 2) {
+            val freq = i * freqsPerBin
+            if (freq > 0) {
+                val pitchClass = (12 * log2(freq / 440.0) % 12).toInt()
+                if (pitchClass in 0..11) {
+                    val magnitude = sqrt(magnitudeSpectrum[2 * i] * magnitudeSpectrum[2 * i] +
+                        (if (2 * i + 1 < magnitudeSpectrum.size) magnitudeSpectrum[2 * i + 1] else 0f) *
+                        magnitudeSpectrum[2 * i + 1])
+                    chroma[pitchClass] += magnitude
+                }
+            }
+        }
+        return chroma.toList()
+    }
+    private fun computeSpectralContrast(samples: FloatArray, sampleRate: Float): List<Float> {
+        val (_, magnitudeSpectrum) = prepareFFT(samples, sampleRate)
+        // ... rest of spectral contrast computation using magnitudeSpectrum ...
+        // Implementation depends on your specific requirements
+        return emptyList() // Placeholder
+    }
+    private fun computeTonnetz(samples: FloatArray, sampleRate: Float): List<Float> {
+        // First compute chroma features
+        val chroma = computeChroma(samples, sampleRate)
+        // Tonnetz transformation matrix (6x12)
+        val tonnetzMatrix = arrayOf(
+            floatArrayOf(1f, 0f, 0f, 0f, 1f, 0f, 0f, 1f, 0f, 0f, 0f, 0f), // Perfect fifth
+            floatArrayOf(0f, 1f, 0f, 0f, 0f, 1f, 0f, 0f, 1f, 0f, 0f, 0f, 0f), // Minor third
+            floatArrayOf(0f, 0f, 1f, 0f, 0f, 0f, 1f, 0f, 0f, 1f, 0f, 0f), // Major third
+            floatArrayOf(0f, 0f, 0f, 1f, 0f, 0f, 0f, 1f, 0f, 0f, 1f, 0f), // Perfect fifth
+            floatArrayOf(0f, 0f, 0f, 0f, 1f, 0f, 0f, 0f, 1f, 0f, 0f, 0f, 0f, 0f, 1f, 0f), // Minor third
+            floatArrayOf(1f, 0f, 0f, 0f, 0f, 1f, 0f, 0f, 0f, 1f, 0f, 0f)  // Major third
+        )
+        // Compute tonnetz features
+        val tonnetz = mutableListOf<Float>()
+        for (row in tonnetzMatrix) {
+            var sum = 0f
+            for (i in row.indices) {
+                sum += row[i] * (chroma.getOrNull(i) ?: 0f)
+            }
+            tonnetz.add(sum)
+        }
+        return tonnetz
+    }
+    private fun nextPowerOfTwo(n: Int): Int {
+        var value = 1
+        while (value < n) {
+            value *= 2
+        }
+        return value
+    }
+    private fun estimatePitch(segment: FloatArray, sampleRate: Float): Float {
+        if (segment.size < 2) return 0.0f
+        // Apply Hann window
+        val windowed = applyHannWindow(segment)
+        // Pad for FFT - ensure length is power of 2 and sufficient for autocorrelation
+        val fftLength = nextPowerOfTwo(segment.size * 2)
+        val padded = FloatArray(fftLength) // Initialize with zeros
+        windowed.copyInto(padded) // Copy windowed data into padded array
+        // Perform forward FFT
+        val fft = FFT(fftLength)
+        try {
+            fft.realForward(padded)
+        } catch (e: Exception) {
+            Log.e("AudioProcessor", "FFT forward transform failed: ${e.message}")
+            return 0.0f
+        }
+        // Compute power spectrum
+        val powerSpectrum = FloatArray(fftLength)
+        try {
+            // Handle DC and Nyquist components separately
+            powerSpectrum[0] = padded[0] * padded[0]
+            powerSpectrum[fftLength/2] = padded[1] * padded[1]
+            // Handle remaining frequencies
+            for (i in 1 until fftLength/2) {
+                val re = padded[2 * i]
+                val im = padded[2 * i + 1]
+                powerSpectrum[i] = re * re + im * im
+                powerSpectrum[fftLength - i] = powerSpectrum[i] // Mirror for inverse FFT
+            }
+        } catch (e: Exception) {
+            Log.e("AudioProcessor", "Power spectrum computation failed: ${e.message}")
+            return 0.0f
+        }
+        // Inverse FFT to get autocorrelation
+        val autocorrelation = FloatArray(fftLength)
+        try {
+            fft.realInverse(powerSpectrum, autocorrelation)
+        } catch (e: Exception) {
+            Log.e("AudioProcessor", "FFT inverse transform failed: ${e.message}")
+            return 0.0f
+        }
+        // Normalize autocorrelation
+        val normFactor = 1.0f / autocorrelation[0] // Normalize by zero-lag autocorrelation
+        for (i in autocorrelation.indices) {
+            autocorrelation[i] *= normFactor
+        }
+        // Find the first peak within pitch range (50-500 Hz)
+        val minLag = (sampleRate / 500.0f).toInt().coerceAtLeast(1)
+        val maxLag = (sampleRate / 50.0f).toInt().coerceAtMost(autocorrelation.size - 1)
+        var maxCorr = -1.0f
+        var pitchLag = 0
+        // Add peak picking criteria
+        val threshold = 0.3f // Correlation threshold
+        var isPeak = false
+        for (lag in minLag..maxLag) {
+            if (lag > 0 && lag < autocorrelation.size - 1) {
+                // Check if this point is a peak
+                isPeak = autocorrelation[lag] > autocorrelation[lag - 1] &&
+                        autocorrelation[lag] > autocorrelation[lag + 1] &&
+                        autocorrelation[lag] > threshold
+                if (isPeak && autocorrelation[lag] > maxCorr) {
+                    maxCorr = autocorrelation[lag]
+                    pitchLag = lag
+                }
+            }
+        }
+        return if (pitchLag > 0) sampleRate / pitchLag else 0.0f
+    }
+    /**
+     * Prepares FFT by applying Hann window, padding, and computing both power and magnitude spectra.
+     * @param samples Input audio samples
+     * @param sampleRate Sampling rate in Hz
+     * @param fftLength FFT size (must be power of 2)
+     * @return Pair of power spectrum and magnitude spectrum
+     */
+    private fun prepareFFT(samples: FloatArray, sampleRate: Float, fftLength: Int = nextPowerOfTwo(samples.size.coerceAtLeast(2048))): Pair<FloatArray, FloatArray> {
+        val windowed = applyHannWindow(samples)
+        val padded = windowed.copyOf(fftLength)
+        val fft = FFT(fftLength)
+        fft.realForward(padded)
+        val magnitudeSpectrum = FloatArray(fftLength / 2 + 1)
+        for (i in 0 until fftLength / 2) {
+            val re = padded[2 * i]
+            val im = padded[2 * i + 1]
+            magnitudeSpectrum[i] = sqrt(re * re + im * im)
+        }
+        magnitudeSpectrum[fftLength / 2] = abs(padded[1])
+        val powerSpectrum = magnitudeSpectrum.map { it * it }.toFloatArray()
+        return Pair(powerSpectrum, magnitudeSpectrum)
+    }
+    data class AudioFormat(
+        val sampleRate: Int,
+        val channels: Int,
+        val bitDepth: Int
+    )
+    fun getAudioFormat(fileUri: String): AudioFormat? {
+        val cleanUri = fileUri.removePrefix("file://")
+        val file = File(cleanUri).takeIf { it.exists() } ?: File(filesDir, File(cleanUri).name).takeIf { it.exists() }
+            ?: run {
+                Log.e(Constants.TAG, "File not found: $cleanUri")
+                return null
+            }
+        val extractor = MediaExtractor()
+        try {
+            extractor.setDataSource(file.absolutePath)
+            val format = extractor.getTrackFormat(0)
+            return AudioFormat(
+                sampleRate = format.getInteger(MediaFormat.KEY_SAMPLE_RATE),
+                channels = format.getInteger(MediaFormat.KEY_CHANNEL_COUNT),
+                bitDepth = 16  // Most compressed formats decode to 16-bit PCM
+            )
+        } catch (e: Exception) {
+            Log.e(Constants.TAG, "Failed to get audio format: ${e.message}")
+            return null
+        } finally {
+            extractor.release()
+        }
+    }
+    /**
+     * Gets the size of the audio file header.
+     * For WAV files, this includes the RIFF header and all metadata chunks before the data chunk.
+     * For other formats, this will return null as header size handling is format-specific.
+     *
+     * @param fileUri The URI of the audio file to analyze
+     * @return The size of the header in bytes, or null if:
+     *         - The file is not a WAV file
+     *         - The file cannot be read
+     *         - The file format is invalid
+     *         - The data chunk cannot be found
+     *
+     * WAV File Structure:
+     * - RIFF header (12 bytes)
+     *   - "RIFF" identifier (4 bytes)
+     *   - File size (4 bytes)
+     *   - "WAVE" identifier (4 bytes)
+     * - Format chunk ("fmt ") (24 bytes typically)
+     * - Optional metadata chunks (variable size)
+     *   - LIST (metadata like artist, title)
+     *   - JUNK (padding)
+     *   - fact (additional format info)
+     *   - cue  (cue points)
+     * - Data chunk
+     *   - "data" identifier (4 bytes)
+     *   - Chunk size (4 bytes)
+     *   - Actual audio data
+     */
+    fun getWavHeaderSize(fileUri: String): Int? {
+        val cleanUri = fileUri.removePrefix("file://")
+        val file = File(cleanUri).takeIf { it.exists() } ?: File(filesDir, File(cleanUri).name).takeIf { it.exists() }
+            ?: run {
+                Log.e(Constants.TAG, "File not found: $cleanUri")
+                return null
+            }
+        try {
+            val inputStream = FileInputStream(file)
+            val buffer = ByteArray(12)  // Read RIFF header and chunk size
+            // Read RIFF header
+            if (inputStream.read(buffer) != 12) {
+                Log.e(Constants.TAG, "Failed to read RIFF header")
+                return null
+            }
+            // Verify RIFF header
+            if (String(buffer, 0, 4) != "RIFF" || String(buffer, 8, 4) != "WAVE") {
+                Log.e(Constants.TAG, "Invalid WAV file format")
+                return null
+            }
+            var headerSize = 12
+            var chunkSize: Int
+            // Read chunks until we find the data chunk
+            while (true) {
+                if (inputStream.read(buffer, 0, 8) != 8) {
+                    Log.e(Constants.TAG, "Unexpected end of file while reading chunks")
+                    break
+                }
+                chunkSize = (buffer[7].toInt() and 0xFF shl 24) or
+                           (buffer[6].toInt() and 0xFF shl 16) or
+                           (buffer[5].toInt() and 0xFF shl 8) or
+                           (buffer[4].toInt() and 0xFF)
+                val chunkId = String(buffer, 0, 4)
+                Log.d(Constants.TAG, "Found chunk: $chunkId, size: $chunkSize")
+                if (chunkId == "data") {
+                    headerSize += 8  // Add chunk header size
+                    Log.d(Constants.TAG, "Found data chunk at offset: $headerSize")
+                    break
+                }
+                headerSize += 8 + chunkSize  // Add chunk header and data size
+                inputStream.skip(chunkSize.toLong())  // Skip chunk data
+            }
+            inputStream.close()
+            Log.d(Constants.TAG, "Total WAV header size: $headerSize bytes")
+            return headerSize
+        } catch (e: Exception) {
+            Log.e(Constants.TAG, "Error calculating WAV header size: ${e.message}")
+            return null
+        }
+    }
 }