npm - @siteed/expo-audio-stream - Versions diffs - 1.17.0 → 2.0.0 - Mend

@siteed/expo-audio-stream 1.17.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (72) hide show

package/CHANGELOG.md +21 -1
package/README.md +1 -1
package/android/src/main/java/net/siteed/audiostream/AudioAnalysisData.kt +68 -22
package/android/src/main/java/net/siteed/audiostream/AudioFormatUtils.kt +24 -0
package/android/src/main/java/net/siteed/audiostream/AudioProcessor.kt +836 -386
package/android/src/main/java/net/siteed/audiostream/AudioRecorderManager.kt +0 -2
package/android/src/main/java/net/siteed/audiostream/AudioRecordingService.kt +35 -29
package/android/src/main/java/net/siteed/audiostream/ExpoAudioStreamModule.kt +236 -96
package/android/src/main/java/net/siteed/audiostream/FFT.kt +55 -0
package/android/src/main/java/net/siteed/audiostream/Features.kt +49 -7
package/android/src/main/java/net/siteed/audiostream/RecordingConfig.kt +2 -4
package/build/AudioAnalysis/AudioAnalysis.types.d.ts +55 -47
package/build/AudioAnalysis/AudioAnalysis.types.d.ts.map +1 -1
package/build/AudioAnalysis/AudioAnalysis.types.js.map +1 -1
package/build/AudioAnalysis/extractAudioAnalysis.d.ts +60 -13
package/build/AudioAnalysis/extractAudioAnalysis.d.ts.map +1 -1
package/build/AudioAnalysis/extractAudioAnalysis.js +147 -162
package/build/AudioAnalysis/extractAudioAnalysis.js.map +1 -1
package/build/ExpoAudioStream.types.d.ts +47 -3
package/build/ExpoAudioStream.types.d.ts.map +1 -1
package/build/ExpoAudioStream.types.js.map +1 -1
package/build/ExpoAudioStream.web.d.ts.map +1 -1
package/build/ExpoAudioStream.web.js +0 -1
package/build/ExpoAudioStream.web.js.map +1 -1
package/build/ExpoAudioStreamModule.d.ts.map +1 -1
package/build/ExpoAudioStreamModule.js +216 -12
package/build/ExpoAudioStreamModule.js.map +1 -1
package/build/WebRecorder.web.d.ts +67 -13
package/build/WebRecorder.web.d.ts.map +1 -1
package/build/WebRecorder.web.js +177 -173
package/build/WebRecorder.web.js.map +1 -1
package/build/index.d.ts +3 -3
package/build/index.d.ts.map +1 -1
package/build/index.js +2 -2
package/build/index.js.map +1 -1
package/build/useAudioRecorder.d.ts.map +1 -1
package/build/useAudioRecorder.js +12 -8
package/build/useAudioRecorder.js.map +1 -1
package/build/utils/audioProcessing.d.ts +24 -0
package/build/utils/audioProcessing.d.ts.map +1 -0
package/build/utils/audioProcessing.js +133 -0
package/build/utils/audioProcessing.js.map +1 -0
package/build/workers/InlineFeaturesExtractor.web.d.ts +1 -1
package/build/workers/InlineFeaturesExtractor.web.d.ts.map +1 -1
package/build/workers/InlineFeaturesExtractor.web.js +694 -194
package/build/workers/InlineFeaturesExtractor.web.js.map +1 -1
package/build/workers/inlineAudioWebWorker.web.d.ts +1 -1
package/build/workers/inlineAudioWebWorker.web.d.ts.map +1 -1
package/build/workers/inlineAudioWebWorker.web.js +3 -2
package/build/workers/inlineAudioWebWorker.web.js.map +1 -1
package/ios/AudioAnalysisData.swift +51 -16
package/ios/AudioProcessingHelpers.swift +710 -26
package/ios/AudioProcessor.swift +334 -185
package/ios/AudioStreamManager.swift +2 -3
package/ios/DataPoint.swift +25 -12
package/ios/DecodingConfig.swift +47 -0
package/ios/ExpoAudioStreamModule.swift +187 -103
package/ios/FFT.swift +62 -0
package/ios/Features.swift +24 -3
package/ios/RecordingSettings.swift +7 -7
package/package.json +2 -1
package/src/AudioAnalysis/AudioAnalysis.types.ts +68 -52
package/src/AudioAnalysis/extractAudioAnalysis.ts +223 -219
package/src/ExpoAudioStream.types.ts +53 -7
package/src/ExpoAudioStream.web.ts +0 -1
package/src/ExpoAudioStreamModule.ts +255 -10
package/src/WebRecorder.web.ts +231 -244
package/src/index.ts +5 -3
package/src/useAudioRecorder.tsx +14 -10
package/src/utils/audioProcessing.ts +205 -0
package/src/workers/InlineFeaturesExtractor.web.tsx +694 -194
package/src/workers/inlineAudioWebWorker.web.tsx +3 -2

package/ios/AudioProcessor.swift CHANGED Viewed

@@ -1,4 +1,4 @@
-// AudioProcessor.swift
+// packages/expo-audio-stream/ios/AudioProcessor.swift
 import Foundation
 import Accelerate
@@ -67,14 +67,24 @@ public class AudioProcessor {
     ///   - numberOfSamples: The number of samples to extract (for waveform).
     ///   - offset: The offset to start reading from (in samples).
     ///   - length: The length of the audio to read (in samples).
-    ///   - pointsPerSecond: The number of data points to extract per second (for features).
-    ///   - algorithm: The algorithm to use for feature extraction.
+    ///   - segmentDurationMs: The duration of each segment in milliseconds.
     ///   - featureOptions: The features to extract.
     ///   - bitDepth: The bit depth of the audio data.
     ///   - numberOfChannels: The number of channels in the audio data.
+    ///   - position: The position to start reading from (in bytes).
+    ///   - byteLength: The length of the audio to read (in bytes).
     /// - Returns: An `AudioAnalysisData` object containing the extracted features.
-    public func processAudioData(numberOfSamples: Int?, offset: Int? = 0, length: UInt? = nil, pointsPerSecond: Int?, algorithm: String, featureOptions: [String: Bool], bitDepth: Int, numberOfChannels: Int) -> AudioAnalysisData? {
+    public func processAudioData(
+        numberOfSamples: Int?,
+        offset: Int? = 0,
+        length: UInt? = nil,
+        segmentDurationMs: Int = 100, // Default 100ms
+        featureOptions: [String: Bool],
+        bitDepth: Int,
+        numberOfChannels: Int,
+        position: Int? = nil,
+        byteLength: Int? = nil
+    ) -> AudioAnalysisData? {
         guard let audioFile = audioFile else {
             reject("FILE_NOT_INITIALIZED", "Audio file is not initialized.")
             return nil
@@ -84,16 +94,69 @@ public class AudioProcessor {
         var framesPerBuffer: AVAudioFrameCount
         let actualPointsPerSecond: Int
+        NSLog("""
+            [AudioProcessor] Starting audio processing:
+            - totalFrameCount: \(totalFrameCount)
+            - bitDepth: \(bitDepth)
+            - numberOfChannels: \(numberOfChannels)
+            - position: \(position ?? -1)
+            - byteLength: \(byteLength ?? -1)
+            - offset: \(offset ?? -1)
+            - length: \(length ?? 0)
+        """)
+        // Use position/byteLength if provided, otherwise fall back to offset/length
+        let effectiveOffset: Int64 = if let position = position {
+            Int64(position / (bitDepth / 8) / numberOfChannels)
+        } else {
+            Int64(offset ?? 0)
+        }
+        let effectiveLength: Int64 = if let byteLength = byteLength {
+            Int64(byteLength / (bitDepth / 8) / numberOfChannels)
+        } else if let length = length {
+            Int64(length)
+        } else {
+            Int64(totalFrameCount) - effectiveOffset
+        }
+        NSLog("""
+            [AudioProcessor] Calculated frame positions:
+            - effectiveOffset: \(effectiveOffset)
+            - effectiveLength: \(effectiveLength)
+            - expectedEndFrame: \(effectiveOffset + effectiveLength)
+            - totalFrameCount: \(totalFrameCount)
+        """)
+        // Validate frame boundaries
+        if effectiveOffset < 0 || effectiveOffset >= Int64(totalFrameCount) {
+            NSLog("[AudioProcessor] ERROR: Invalid offset value")
+            reject("INVALID_OFFSET", "Offset value (\(effectiveOffset)) is outside valid range [0, \(totalFrameCount)]")
+            return nil
+        }
+        if effectiveLength <= 0 {
+            NSLog("[AudioProcessor] ERROR: Invalid length value")
+            reject("INVALID_LENGTH", "Length value (\(effectiveLength)) must be positive")
+            return nil
+        }
+        if effectiveOffset + effectiveLength > Int64(totalFrameCount) {
+            NSLog("[AudioProcessor] ERROR: Requested range exceeds file length")
+            reject("INVALID_RANGE", "Requested range [\(effectiveOffset), \(effectiveOffset + effectiveLength)] exceeds file length \(totalFrameCount)")
+            return nil
+        }
+        var startFrame: AVAudioFramePosition = effectiveOffset
+        let endFrame: AVAudioFramePosition = effectiveOffset + effectiveLength
+        // Calculate frames per segment based on segment duration
+        let framesPerSegment = AVAudioFrameCount(Float(audioFile.fileFormat.sampleRate) * Float(segmentDurationMs) / 1000.0)
         if let numberOfSamples = numberOfSamples {
-            framesPerBuffer = totalFrameCount / AVAudioFrameCount(numberOfSamples)
-            actualPointsPerSecond = Int(Double(totalFrameCount) / audioFile.fileFormat.sampleRate)
-        } else if let pointsPerSecond = pointsPerSecond {
-            actualPointsPerSecond = pointsPerSecond
-            framesPerBuffer = totalFrameCount / AVAudioFrameCount(actualPointsPerSecond)
+            framesPerBuffer = AVAudioFrameCount(max(1, effectiveLength / Int64(numberOfSamples)))
         } else {
-            // Default behavior: set pointsPerSecond to 1000
-            actualPointsPerSecond = 1000
-            framesPerBuffer = totalFrameCount / AVAudioFrameCount(actualPointsPerSecond)
+            framesPerBuffer = framesPerSegment
         }
         guard let buffer = AVAudioPCMBuffer(pcmFormat: audioFile.processingFormat, frameCapacity: framesPerBuffer) else {
@@ -104,11 +167,15 @@ public class AudioProcessor {
         channelCount = Int(audioFile.processingFormat.channelCount)
         var data = Array(repeating: [Float](repeating: 0, count: Int(framesPerBuffer)), count: channelCount)
-        var startFrame: AVAudioFramePosition = offset == nil ? audioFile.framePosition : Int64(offset! * Int(framesPerBuffer))
-        var endFrame: AVAudioFramePosition = length == nil ? audioFile.length : min(audioFile.length, startFrame + Int64(length!))
         var channelData = [Float]()
         while startFrame < endFrame {
+            let remainingFrames = endFrame - startFrame
+            let currentFramesPerBuffer = min(AVAudioFrameCount(framesPerBuffer), AVAudioFrameCount(remainingFrames))
+            if currentFramesPerBuffer <= 0 {
+                break
+            }
             if abortExtraction {
                 audioFile.framePosition = startFrame
                 abortExtraction = false
@@ -117,7 +184,7 @@ public class AudioProcessor {
             do {
                 audioFile.framePosition = startFrame
-                try audioFile.read(into: buffer, frameCount: framesPerBuffer)
+                try audioFile.read(into: buffer, frameCount: currentFramesPerBuffer)
             } catch {
                 reject("AUDIO_READ_ERROR", "Couldn't read into buffer: \(error.localizedDescription)")
                 return nil
@@ -132,26 +199,42 @@ public class AudioProcessor {
                 channelData.append(floatData[0][frame])
             }
-            startFrame += AVAudioFramePosition(framesPerBuffer)
-            if startFrame + AVAudioFramePosition(framesPerBuffer) > endFrame {
-                framesPerBuffer = AVAudioFrameCount(endFrame - startFrame)
-            }
+            startFrame += AVAudioFramePosition(currentFramesPerBuffer)
         }
-        return processChannelData(channelData: channelData, sampleRate: Float(audioFile.fileFormat.sampleRate), pointsPerSecond: actualPointsPerSecond, algorithm: algorithm, featureOptions: featureOptions, bitDepth: bitDepth, numberOfChannels: numberOfChannels)
+        NSLog("""
+            [AudioProcessor] Audio processing completed:
+            - processedFrames: \(endFrame - startFrame)
+            - framesPerBuffer: \(framesPerBuffer)
+        """)
+        return processChannelData(
+            channelData: channelData,
+            sampleRate: Float(audioFile.fileFormat.sampleRate),
+            segmentDurationMs: segmentDurationMs,
+            featureOptions: featureOptions,
+            bitDepth: bitDepth,
+            numberOfChannels: numberOfChannels
+        )
     }
     /// Processes audio data from a buffer.
     /// - Parameters:
     ///   - data: The audio data buffer.
     ///   - sampleRate: The sample rate of the audio data.
-    ///   - pointsPerSecond: The number of data points to extract per second (for features).
-    ///   - algorithm: The algorithm to use for feature extraction.
+    ///   - segmentDurationMs: The duration of each segment in milliseconds.
     ///   - featureOptions: The features to extract.
     ///   - bitDepth: The bit depth of the audio data.
     ///   - numberOfChannels: The number of channels in the audio data.
     /// - Returns: An `AudioAnalysisData` object containing the extracted features.
-    public func processAudioBuffer(data: Data, sampleRate: Float, pointsPerSecond: Int, algorithm: String, featureOptions: [String: Bool], bitDepth: Int, numberOfChannels: Int) -> AudioAnalysisData? {
+    public func processAudioBuffer(
+        data: Data,
+        sampleRate: Float,
+        segmentDurationMs: Int,
+        featureOptions: [String: Bool],
+        bitDepth: Int,
+        numberOfChannels: Int
+    ) -> AudioAnalysisData? {
         guard !data.isEmpty else {
             Logger.debug("Data is empty, rejecting")
             reject("DATA_EMPTY", "The audio data is empty.")
@@ -177,121 +260,154 @@ public class AudioProcessor {
             return nil
         }
-        return processChannelData(channelData: floatData, sampleRate: sampleRate, pointsPerSecond: pointsPerSecond, algorithm: algorithm, featureOptions: featureOptions, bitDepth: bitDepth, numberOfChannels: numberOfChannels)
+        return processChannelData(
+            channelData: floatData,
+            sampleRate: sampleRate,
+            segmentDurationMs: segmentDurationMs,
+            featureOptions: featureOptions,
+            bitDepth: bitDepth,
+            numberOfChannels: numberOfChannels
+        )
     }
     /// Processes the given audio channel data to extract features.
     /// - Parameters:
     ///   - channelData: The audio channel data to process.
     ///   - sampleRate: The sample rate of the audio data.
-    ///   - pointsPerSecond: The number of data points to extract per second (for features).
-    ///   - algorithm: The algorithm to use for feature extraction.
+    ///   - segmentDurationMs: The duration of each segment in milliseconds.
     ///   - featureOptions: The features to extract.
     ///   - bitDepth: The bit depth of the audio data.
     ///   - numberOfChannels: The number of channels in the audio data.
     /// - Returns: An `AudioAnalysisData` object containing the extracted features.
-    private func processChannelData(channelData: [Float], sampleRate: Float, pointsPerSecond: Int, algorithm: String, featureOptions: [String: Bool], bitDepth: Int, numberOfChannels: Int) -> AudioAnalysisData? {
-        Logger.debug("Processing audio data with sample rate: \(sampleRate), points per second: \(pointsPerSecond), algorithm: \(algorithm), bitDepth: \(bitDepth), numberOfChannels: \(numberOfChannels)")
+    private func processChannelData(
+        channelData: [Float],
+        sampleRate: Float,
+        segmentDurationMs: Int,
+        featureOptions: [String: Bool],
+        bitDepth: Int,
+        numberOfChannels: Int
+    ) -> AudioAnalysisData? {
+        Logger.debug("Processing audio data with sample rate: \(sampleRate), segmentDurationMs: \(segmentDurationMs), bitDepth: \(bitDepth), numberOfChannels: \(numberOfChannels)")
-        let startTime = CACurrentMediaTime() // Start the timer with high precision
+        let startTime = CACurrentMediaTime()
         let length = channelData.count
-        let pointInterval = Int(sampleRate) / pointsPerSecond
+        // Calculate points per segment based on segment duration
+        let samplesPerSegment = Int(Float(segmentDurationMs) * sampleRate / 1000.0)
         var dataPoints = [DataPoint]()
         var minAmplitude: Float = .greatestFiniteMagnitude
         var maxAmplitude: Float = -.greatestFiniteMagnitude
-        let durationMs = Float(length) / sampleRate * 1000
-        var sumSquares: Float = 0
-        var zeroCrossings = 0
-        var prevValue: Float = 0
-        var localMinAmplitude: Float = .greatestFiniteMagnitude
-        var localMaxAmplitude: Float = -.greatestFiniteMagnitude
-        var segmentData = [Float]()
-        var currentPosition = 0 // Track the current byte position
-        for i in 0..<length {
-            updateSegmentData(channelData: channelData, index: i, sumSquares: &sumSquares, zeroCrossings: &zeroCrossings, prevValue: &prevValue, localMinAmplitude: &localMinAmplitude, localMaxAmplitude: &localMaxAmplitude, segmentData: &segmentData)
+        // Calculate bytes per sample
+        let bytesPerSample = bitDepth / 8
+        // Process data in segments
+        var i = 0
+        while i < length {
+            let segmentEnd = min(i + samplesPerSegment, length)
+            let segment = Array(channelData[i..<segmentEnd])
-            if (i + 1) % pointInterval == 0 || i == length - 1 {
-                var features = computeFeatures(segmentData: segmentData, sampleRate: sampleRate, sumSquares: sumSquares, zeroCrossings: zeroCrossings, segmentLength: (i % pointInterval) + 1, featureOptions: featureOptions)
-                features.minAmplitude = localMinAmplitude
-                features.maxAmplitude = localMaxAmplitude
-                let rms = features.rms
-                let silent = rms < 0.01
-                let dB = featureOptions["dB"] == true ? 20 * log10(rms) : 0
-                minAmplitude = min(minAmplitude, localMinAmplitude)
-                maxAmplitude = max(maxAmplitude, localMaxAmplitude)
-                let segmentSize = segmentData.count
-                let segmentDuration = Float(segmentSize) / sampleRate
-                // Calculate start time and end time
-                let segmentStartTime = Float(i - segmentSize + 1) / sampleRate
-                let segmentEndTime = Float(i + 1) / sampleRate
-                // Calculate start position and end position in bytes
-               let bytesPerSample = bitDepth / 8
-               let startPosition = currentPosition
-               let endPosition = startPosition + (segmentSize * bytesPerSample * numberOfChannels)
-                dataPoints.append(DataPoint(
-                    id: uniqueIdCounter, // Assign unique ID
-                    amplitude: algorithm == "peak" ? localMaxAmplitude : rms,
-                    activeSpeech: nil,
-                    dB: dB,
-                    silent: silent,
-                    features: features,
-                    startTime: segmentStartTime,
-                    endTime: segmentEndTime,
-                    startPosition: startPosition,
-                    endPosition: endPosition,
-                    speaker: 0
-                ))
-                uniqueIdCounter += 1 // Increment the unique ID counter
-                resetSegmentData(&sumSquares, &zeroCrossings, &localMinAmplitude, &localMaxAmplitude, &segmentData)
-                // Update the current byte position
-                currentPosition = endPosition
-            }
+            // Calculate byte positions and timing
+            let startPosition = i * bytesPerSample * numberOfChannels
+            let endPosition = segmentEnd * bytesPerSample * numberOfChannels
+            let startTime = Float(i) / sampleRate
+            let endTime = Float(segmentEnd) / sampleRate
+            // Process segment and create data point
+            let dataPoint = processSegment(
+                segment,
+                sampleRate: sampleRate,
+                featureOptions: featureOptions,
+                startTime: startTime,
+                endTime: endTime,
+                startPosition: startPosition,
+                endPosition: endPosition
+            )
+            dataPoints.append(dataPoint)
+            // Update min/max amplitudes
+            minAmplitude = min(minAmplitude, segment.min() ?? minAmplitude)
+            maxAmplitude = max(maxAmplitude, segment.max() ?? maxAmplitude)
+            i += samplesPerSegment
         }
-        let endTime = CACurrentMediaTime() // End the timer with high precision
+        let endTime = CACurrentMediaTime()
         let processingTimeMs = Float((endTime - startTime) * 1000)
         Logger.debug("Processed \(dataPoints.count) data points in \(processingTimeMs) ms")
         return AudioAnalysisData(
-            pointsPerSecond: pointsPerSecond,
-            durationMs: Float(durationMs),
+            segmentDurationMs: segmentDurationMs,
+            durationMs: Int(Float(length) / sampleRate * 1000),
             bitDepth: bitDepth,
             numberOfChannels: numberOfChannels,
-            sampleRate: sampleRate,
-            samples: channelData.count,
+            sampleRate: Int(sampleRate),
+            samples: length,
             dataPoints: dataPoints,
-            amplitudeRange: (min: minAmplitude, max: maxAmplitude),
-            speakerChanges: [],
+            amplitudeRange: AudioAnalysisData.AmplitudeRange(
+                min: minAmplitude,
+                max: maxAmplitude
+            ),
+            rmsRange: AudioAnalysisData.AmplitudeRange(
+                min: 0,
+                max: 1
+            ),
+            speechAnalysis: nil,
             extractionTimeMs: processingTimeMs
         )
     }
-    private func updateSegmentData(channelData: [Float], index: Int, sumSquares: inout Float, zeroCrossings: inout Int, prevValue: inout Float, localMinAmplitude: inout Float, localMaxAmplitude: inout Float, segmentData: inout [Float]) {
-        let value = channelData[index]
-        sumSquares += value * value
-        if index > 0 && value * prevValue < 0 {
-            zeroCrossings += 1
-        }
-        prevValue = value
+    private func processSegment(
+        _ segment: [Float],
+        sampleRate: Float,
+        featureOptions: [String: Bool],
+        startTime: Float,
+        endTime: Float,
+        startPosition: Int,
+        endPosition: Int
+    ) -> DataPoint {
+        let sumSquares: Float = segment.reduce(0) { $0 + $1 * $1 }
+        let rms = sqrt(sumSquares / Float(segment.count))
+        let silent = rms < 0.01
+        let dB = Float(20 * log10(Double(rms)))
+        let features = computeFeatures(
+            segmentData: segment,
+            sampleRate: sampleRate,
+            sumSquares: sumSquares,
+            zeroCrossings: 0,
+            segmentLength: segment.count,
+            featureOptions: featureOptions
+        )
-        let absValue = abs(value)
-        localMinAmplitude = min(localMinAmplitude, absValue)
-        localMaxAmplitude = max(localMaxAmplitude, absValue)
-        segmentData.append(value)
+        let dataPoint = DataPoint(
+            id: Int(uniqueIdCounter),
+            amplitude: segment.max() ?? 0,
+            rms: rms,
+            dB: dB,
+            silent: silent,
+            features: features,
+            speech: SpeechFeatures(isActive: !silent),
+            startTime: startTime,
+            endTime: endTime,
+            startPosition: startPosition,
+            endPosition: endPosition,
+            samples: segment.count
+        )
+        uniqueIdCounter += 1
+        return dataPoint
     }
-    private func computeFeatures(segmentData: [Float], sampleRate: Float, sumSquares: Float, zeroCrossings: Int, segmentLength: Int, featureOptions: [String: Bool]) -> Features {
+    private func computeFeatures(
+        segmentData: [Float],
+        sampleRate: Float,
+        sumSquares: Float,
+        zeroCrossings: Int,
+        segmentLength: Int,
+        featureOptions: [String: Bool]
+    ) -> Features {
         let rms = sqrt(sumSquares / Float(segmentLength))
         let energy = featureOptions["energy"] == true ? sumSquares : 0
         let zcr = featureOptions["zcr"] == true ? Float(zeroCrossings) / Float(segmentLength) : 0
@@ -303,13 +419,24 @@ public class AudioProcessor {
         let chromagram = featureOptions["chromagram"] == true ? extractChromagram(from: segmentData, sampleRate: sampleRate) : []
         let tempo = featureOptions["tempo"] == true ? extractTempo(from: segmentData, sampleRate: sampleRate) : 0
         let hnr = featureOptions["hnr"] == true ? extractHNR(from: segmentData) : 0
+        let melSpectrogram = featureOptions["melSpectrogram"] == true ? computeMelSpectrogram(from: segmentData, sampleRate: sampleRate) : []
+        let spectralContrast = featureOptions["spectralContrast"] == true ? computeSpectralContrast(from: segmentData, sampleRate: sampleRate) : []
+        let tonnetz = featureOptions["tonnetz"] == true ? computeTonnetz(from: segmentData, sampleRate: sampleRate) : []
+        let pitch = featureOptions["pitch"] == true ? estimatePitch(from: segmentData, sampleRate: sampleRate) : 0
+        // Calculate min and max amplitudes from the segment data
+        let minAmplitude = segmentData.map(abs).min() ?? 0
+        let maxAmplitude = segmentData.map(abs).max() ?? 0
+        let crc32Value = featureOptions["crc32"] == true ?
+            calculateCRC32(from: segmentData, count: segmentData.count) : nil
         return Features(
             energy: energy,
             mfcc: mfcc,
             rms: rms,
-            minAmplitude: 0, // computed before and will be overwritten
-            maxAmplitude: 0, // computed before and will be overwritten
+            minAmplitude: minAmplitude,
+            maxAmplitude: maxAmplitude,
             zcr: zcr,
             spectralCentroid: spectralCentroid,
             spectralFlatness: spectralFlatness,
@@ -317,24 +444,20 @@ public class AudioProcessor {
             spectralBandwidth: spectralBandwidth,
             chromagram: chromagram,
             tempo: tempo,
-            hnr: hnr
+            hnr: hnr,
+            melSpectrogram: melSpectrogram,
+            spectralContrast: spectralContrast,
+            tonnetz: tonnetz,
+            pitch: pitch,
+            crc32: crc32Value
         )
     }
-    private func resetSegmentData(_ sumSquares: inout Float, _ zeroCrossings: inout Int, _ localMinAmplitude: inout Float, _ localMaxAmplitude: inout Float, _ segmentData: inout [Float]) {
-        sumSquares = 0
-        zeroCrossings = 0
-        localMinAmplitude = .greatestFiniteMagnitude
-        localMaxAmplitude = -.greatestFiniteMagnitude
-        segmentData.removeAll()
-    }
     /// Processes audio data with time range support
     public func processAudioData(
         startTimeMs: Double? = nil,
         endTimeMs: Double? = nil,
-        pointsPerSecond: Int? = nil,
-        algorithm: String,
+        segmentDurationMs: Int = 100, // Default 100ms
         featureOptions: [String: Bool]
     ) -> AudioAnalysisData? {
         guard let audioFile = audioFile else {
@@ -358,9 +481,8 @@ public class AudioProcessor {
             return nil
         }
-        // Calculate frames per buffer based on points per second
-        let actualPointsPerSecond = pointsPerSecond ?? 20
-        let framesPerBuffer = AVAudioFrameCount((endFrame - startFrame) / Int64(actualPointsPerSecond))
+        // Calculate frames per buffer based on segment duration
+        let framesPerBuffer = AVAudioFrameCount(Float(sampleRate) * Float(segmentDurationMs) / 1000.0)
         guard let buffer = AVAudioPCMBuffer(pcmFormat: audioFile.processingFormat, frameCapacity: framesPerBuffer) else {
             Logger.debug("Failed to create buffer")
@@ -399,20 +521,18 @@ public class AudioProcessor {
                     summedData[i] /= Float(numberOfChannels)
                 }
-                // Calculate amplitude based on algorithm
-                let amplitude: Float
-                if algorithm.lowercased() == "peak" {
-                    var localMax: Float = 0
-                    vDSP_maxmgv(summedData, 1, &localMax, vDSP_Length(framesToRead))
-                    amplitude = localMax
-                } else {
-                    var rms: Float = 0
-                    vDSP_rmsqv(summedData, 1, &rms, vDSP_Length(framesToRead))
-                    amplitude = rms
-                }
+                // Calculate both peak amplitude and RMS
+                var localMax: Float = 0
+                var rms: Float = 0
+                vDSP_maxmgv(summedData, 1, &localMax, vDSP_Length(framesToRead))
+                // Calculate RMS using vDSP
+                var meanSquare: Float = 0
+                vDSP_measqv(summedData, 1, &meanSquare, vDSP_Length(framesToRead))
+                rms = sqrt(meanSquare)
-                minAmplitude = min(minAmplitude, amplitude)
-                maxAmplitude = max(maxAmplitude, amplitude)
+                minAmplitude = min(minAmplitude, localMax)
+                maxAmplitude = max(maxAmplitude, localMax)
                 // Create data point
                 let startTime = Float(currentFrame) / Float(sampleRate)
@@ -420,11 +540,24 @@ public class AudioProcessor {
                 let dataPoint = DataPoint(
                     id: currentId,
-                    amplitude: amplitude,
+                    amplitude: localMax,      // Always use peak amplitude
+                    rms: rms,                // Use calculated RMS value
+                    dB: Float(20 * log10(Double(rms))),  // Use RMS for dB calculation
+                    silent: rms < 0.01,      // Use RMS for silence detection
+                    features: computeFeatures(
+                        segmentData: Array(UnsafeBufferPointer(start: summedData, count: Int(framesToRead))),
+                        sampleRate: sampleRate,
+                        sumSquares: rms * rms,
+                        zeroCrossings: 0,
+                        segmentLength: Int(framesToRead),
+                        featureOptions: featureOptions
+                    ),
+                    speech: SpeechFeatures(isActive: rms >= 0.01),
                     startTime: startTime,
                     endTime: endTime,
                     startPosition: Int(currentFrame),
-                    endPosition: Int(currentFrame + Int64(framesToRead))
+                    endPosition: Int(currentFrame + Int64(framesToRead)),
+                    samples: Int(framesToRead)
                 )
                 dataPoints.append(dataPoint)
@@ -441,51 +574,43 @@ public class AudioProcessor {
         let extractionTime = Float(endTime - startTime) * 1000 // Convert to milliseconds
         return AudioAnalysisData(
-            pointsPerSecond: actualPointsPerSecond,
-            durationMs: Float(endFrame - startFrame) * 1000 / Float(sampleRate),
+            segmentDurationMs: segmentDurationMs,
+            durationMs: Int(Float(endFrame - startFrame) * 1000 / sampleRate),
             bitDepth: bitDepth,
             numberOfChannels: numberOfChannels,
-            sampleRate: sampleRate,
+            sampleRate: Int(sampleRate),
             samples: Int(endFrame - startFrame),
             dataPoints: dataPoints,
-            amplitudeRange: (min: minAmplitude, max: maxAmplitude),
+            amplitudeRange: AudioAnalysisData.AmplitudeRange(
+                min: minAmplitude,
+                max: maxAmplitude
+            ),
+            rmsRange: AudioAnalysisData.AmplitudeRange(
+                min: 0,
+                max: 1
+            ),
+            speechAnalysis: nil,
             extractionTimeMs: extractionTime
         )
     }
-    private func calculateZeroCrossingRate(_ data: [Float]) -> Float {
-        var count: Float = 0
-        for i in 1..<data.count {
-            if (data[i] >= 0 && data[i-1] < 0) || (data[i] < 0 && data[i-1] >= 0) {
-                count += 1
-            }
-        }
-        return count / Float(data.count)
-    }
-    private func calculateEnergy(_ data: [Float]) -> Float {
-        var energy: Float = 0
-        vDSP_svesq(data, 1, &energy, vDSP_Length(data.count))
-        return energy / Float(data.count)
-    }
     /// Trims audio file to specified range
     public func trimAudio(
         startTimeMs: Double,
         endTimeMs: Double,
         outputFormat: [String: Any]?
     ) -> TrimResult? {
-        guard let audioFile = audioFile else {
+        guard let currentAudioFile = audioFile else {
             Logger.debug("No audio file loaded")
             return nil
         }
-        let sampleRate = audioFile.fileFormat.sampleRate
+        let sampleRate = currentAudioFile.fileFormat.sampleRate
         let startFrame = AVAudioFramePosition(startTimeMs * sampleRate / 1000.0)
         let endFrame = AVAudioFramePosition(endTimeMs * sampleRate / 1000.0)
         // Create output format
-        let outputSettings = createOutputSettings(from: outputFormat, originalFormat: audioFile.fileFormat)
+        let outputSettings = createOutputSettings(from: outputFormat, originalFormat: currentAudioFile.fileFormat)
         // Create temporary output file
         let outputURL = FileManager.default.temporaryDirectory
@@ -503,11 +628,11 @@ public class AudioProcessor {
             // Read and write in chunks
             let bufferSize = 32768
             let buffer = AVAudioPCMBuffer(
-                pcmFormat: audioFile.processingFormat,
+                pcmFormat: currentAudioFile.processingFormat,
                 frameCapacity: AVAudioFrameCount(bufferSize)
             )!
-            audioFile.framePosition = startFrame
+            currentAudioFile.framePosition = startFrame
             var currentFrame = startFrame
             while currentFrame < endFrame {
@@ -516,7 +641,7 @@ public class AudioProcessor {
                     AVAudioFrameCount(endFrame - currentFrame)
                 )
-                try audioFile.read(into: buffer, frameCount: framesToRead)
+                try currentAudioFile.read(into: buffer, frameCount: framesToRead)
                 try outputFile.write(from: buffer)
                 currentFrame += Int64(framesToRead)
@@ -526,12 +651,18 @@ public class AudioProcessor {
             let attributes = try FileManager.default.attributesOfItem(atPath: outputURL.path)
             let fileSize = attributes[.size] as! Int64
-            return TrimResult(
+            // After successful trim, update the class property
+            audioFile = try AVAudioFile(forReading: outputURL)
+            // After successful trim, create the result
+            let trimmedDuration = (endTimeMs - startTimeMs) / 1000.0 // Convert to seconds
+            let result = TrimResult(
                 uri: outputURL.absoluteString,
-                duration: Double(endFrame - startFrame) / sampleRate,
+                duration: trimmedDuration, // Use actual trimmed duration
                 size: fileSize
             )
+            return result
         } catch {
             Logger.debug("Error trimming audio: \(error)")
             return nil
@@ -561,10 +692,14 @@ public class AudioProcessor {
     ///   - numberOfPoints: The number of points to extract
     ///   - startTimeMs: Optional start time in milliseconds
     ///   - endTimeMs: Optional end time in milliseconds
-    ///   - algorithm: The algorithm to use for feature extraction
     ///   - featureOptions: The features to extract
     /// - Returns: An `AudioAnalysisData` object containing the extracted features
-    public func extractPreview(numberOfPoints: Int, startTimeMs: Double? = nil, endTimeMs: Double? = nil, algorithm: String, featureOptions: [String: Bool]) -> AudioAnalysisData? {
+    public func extractPreview(
+        numberOfPoints: Int,
+        startTimeMs: Double? = nil,
+        endTimeMs: Double? = nil,
+        featureOptions: [String: Bool]
+    ) -> AudioAnalysisData? {
         guard let audioFile = audioFile else {
             reject("FILE_NOT_INITIALIZED", "Audio file is not initialized.")
             return nil
@@ -576,9 +711,9 @@ public class AudioProcessor {
         // Calculate effective time range
         let effectiveStartMs = startTimeMs ?? 0.0
         let effectiveEndMs = min(endTimeMs ?? totalDurationMs, totalDurationMs)
-        let durationMs = effectiveEndMs - effectiveStartMs
+        let durationMs = effectiveEndMs - effectiveStartMs // This is the actual duration we want to use
-        // Convert time to frames
+        // Convert time to frames with proper offset
         let startFrame = AVAudioFramePosition(effectiveStartMs * Double(sampleRate) / 1000.0)
         let endFrame = AVAudioFramePosition(effectiveEndMs * Double(sampleRate) / 1000.0)
         let samplesInRange = Int(endFrame - startFrame)
@@ -596,11 +731,19 @@ public class AudioProcessor {
         var minAmplitude: Float = .greatestFiniteMagnitude
         var maxAmplitude: Float = -.greatestFiniteMagnitude
+        let bytesPerSample = audioFile.fileFormat.settings[AVLinearPCMBitDepthKey] as? Int ?? 16 / 8
         for i in 0..<numberOfPoints {
             let pointStartFrame = startFrame + Int64(i * samplesPerPoint)
             let pointEndFrame = startFrame + Int64((i + 1) * samplesPerPoint)
             let framesToRead = AVAudioFrameCount(pointEndFrame - pointStartFrame)
+            // Calculate byte positions
+            let startPosition = Int(pointStartFrame) * bytesPerSample * Int(audioFile.fileFormat.channelCount)
+            let endPosition = Int(pointEndFrame) * bytesPerSample * Int(audioFile.fileFormat.channelCount)
+            let segmentStartTime = Float(pointStartFrame) / sampleRate
+            let segmentEndTime = Float(pointEndFrame) / sampleRate
             do {
                 audioFile.framePosition = pointStartFrame
                 let buffer = AVAudioPCMBuffer(pcmFormat: audioFile.processingFormat, frameCapacity: framesToRead)!
@@ -637,24 +780,23 @@ public class AudioProcessor {
                 let rms = features.rms
                 let silent = rms < 0.01
-                let dB = featureOptions["dB"] == true ? 20 * log10(rms) : 0
-                let segmentStartTime = Float(pointStartFrame) / sampleRate
-                let segmentEndTime = Float(pointEndFrame) / sampleRate
+                let dB = Float(20 * log10(Double(rms)))
-                dataPoints.append(DataPoint(
-                    id: uniqueIdCounter,
-                    amplitude: algorithm == "peak" ? localMaxAmplitude : rms,
-                    activeSpeech: nil,
+                let dataPoint = DataPoint(
+                    id: Int(uniqueIdCounter),
+                    amplitude: localMaxAmplitude,
+                    rms: rms,
                     dB: dB,
                     silent: silent,
                     features: features,
+                    speech: SpeechFeatures(isActive: !silent),
                     startTime: segmentStartTime,
                     endTime: segmentEndTime,
-                    startPosition: Int(pointStartFrame),
-                    endPosition: Int(pointEndFrame),
-                    speaker: 0
-                ))
+                    startPosition: startPosition,
+                    endPosition: endPosition,
+                    samples: Int(framesToRead)
+                )
+                dataPoints.append(dataPoint)
                 uniqueIdCounter += 1
                 minAmplitude = min(minAmplitude, localMinAmplitude)
@@ -694,15 +836,22 @@ public class AudioProcessor {
         """)
         return AudioAnalysisData(
-            pointsPerSecond: numberOfPoints,
-            durationMs: Float(durationMs),
+            segmentDurationMs: 100, // Default 100ms
+            durationMs: Int(durationMs), // Use actual duration of trimmed section
             bitDepth: bitDepth,
             numberOfChannels: numberOfChannels,
-            sampleRate: sampleRate,
+            sampleRate: Int(sampleRate),
             samples: samplesInRange,
             dataPoints: dataPoints,
-            amplitudeRange: (min: minAmplitude, max: maxAmplitude),
-            speakerChanges: [],
+            amplitudeRange: AudioAnalysisData.AmplitudeRange(
+                min: minAmplitude,
+                max: maxAmplitude
+            ),
+            rmsRange: AudioAnalysisData.AmplitudeRange(
+                min: 0,
+                max: 1
+            ),
+            speechAnalysis: nil,
             extractionTimeMs: extractionTimeMs
         )
     }