@siteed/expo-audio-stream 1.12.2 → 1.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +6 -2
- package/android/src/main/java/net/siteed/audiostream/AudioProcessor.kt +866 -70
- package/android/src/main/java/net/siteed/audiostream/AudioRecorderManager.kt +4 -0
- package/android/src/main/java/net/siteed/audiostream/AudioRecordingService.kt +30 -9
- package/android/src/main/java/net/siteed/audiostream/ExpoAudioStreamModule.kt +163 -24
- package/build/AudioAnalysis/AudioAnalysis.types.d.ts +62 -0
- package/build/AudioAnalysis/AudioAnalysis.types.d.ts.map +1 -1
- package/build/AudioAnalysis/AudioAnalysis.types.js.map +1 -1
- package/build/AudioAnalysis/extractAudioAnalysis.d.ts +10 -1
- package/build/AudioAnalysis/extractAudioAnalysis.d.ts.map +1 -1
- package/build/AudioAnalysis/extractAudioAnalysis.js +158 -0
- package/build/AudioAnalysis/extractAudioAnalysis.js.map +1 -1
- package/build/index.d.ts +3 -2
- package/build/index.d.ts.map +1 -1
- package/build/index.js +2 -2
- package/build/index.js.map +1 -1
- package/build/useAudioRecorder.d.ts.map +1 -1
- package/build/useAudioRecorder.js +35 -16
- package/build/useAudioRecorder.js.map +1 -1
- package/ios/AudioProcessor.swift +391 -1
- package/ios/ExpoAudioStreamModule.swift +100 -0
- package/ios/Features.swift +30 -0
- package/package.json +1 -1
- package/plugin/build/index.d.ts +0 -1
- package/plugin/build/index.js +0 -5
- package/plugin/src/index.ts +0 -6
- package/src/AudioAnalysis/AudioAnalysis.types.ts +66 -0
- package/src/AudioAnalysis/extractAudioAnalysis.ts +219 -0
- package/src/index.ts +12 -1
- package/src/useAudioRecorder.tsx +37 -16
package/ios/AudioProcessor.swift
CHANGED
|
@@ -5,6 +5,18 @@ import Accelerate
|
|
|
5
5
|
import AVFoundation
|
|
6
6
|
import QuartzCore
|
|
7
7
|
|
|
8
|
+
public struct TrimResult {
|
|
9
|
+
public let uri: String
|
|
10
|
+
public let duration: Double
|
|
11
|
+
public let size: Int64
|
|
12
|
+
|
|
13
|
+
public init(uri: String, duration: Double, size: Int64) {
|
|
14
|
+
self.uri = uri
|
|
15
|
+
self.duration = duration
|
|
16
|
+
self.size = size
|
|
17
|
+
}
|
|
18
|
+
}
|
|
19
|
+
|
|
8
20
|
public class AudioProcessor {
|
|
9
21
|
public private(set) var audioFile: AVAudioFile?
|
|
10
22
|
private var result: (Any) -> Void
|
|
@@ -252,7 +264,7 @@ public class AudioProcessor {
|
|
|
252
264
|
|
|
253
265
|
return AudioAnalysisData(
|
|
254
266
|
pointsPerSecond: pointsPerSecond,
|
|
255
|
-
durationMs: durationMs,
|
|
267
|
+
durationMs: Float(durationMs),
|
|
256
268
|
bitDepth: bitDepth,
|
|
257
269
|
numberOfChannels: numberOfChannels,
|
|
258
270
|
sampleRate: sampleRate,
|
|
@@ -316,4 +328,382 @@ public class AudioProcessor {
|
|
|
316
328
|
localMaxAmplitude = -.greatestFiniteMagnitude
|
|
317
329
|
segmentData.removeAll()
|
|
318
330
|
}
|
|
331
|
+
|
|
332
|
+
/// Processes audio data with time range support
|
|
333
|
+
public func processAudioData(
|
|
334
|
+
startTimeMs: Double? = nil,
|
|
335
|
+
endTimeMs: Double? = nil,
|
|
336
|
+
pointsPerSecond: Int? = nil,
|
|
337
|
+
algorithm: String,
|
|
338
|
+
featureOptions: [String: Bool]
|
|
339
|
+
) -> AudioAnalysisData? {
|
|
340
|
+
guard let audioFile = audioFile else {
|
|
341
|
+
Logger.debug("No audio file loaded")
|
|
342
|
+
return nil
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
let startTime = CACurrentMediaTime()
|
|
346
|
+
let sampleRate = Float(audioFile.fileFormat.sampleRate)
|
|
347
|
+
let totalFrameCount = AVAudioFrameCount(audioFile.length)
|
|
348
|
+
let bitDepth = audioFile.fileFormat.settings[AVLinearPCMBitDepthKey] as? Int ?? 16
|
|
349
|
+
let numberOfChannels = Int(audioFile.fileFormat.channelCount)
|
|
350
|
+
|
|
351
|
+
// Convert time to frames
|
|
352
|
+
let startFrame = startTimeMs.map { AVAudioFramePosition(Double($0) * Double(sampleRate) / 1000.0) } ?? 0
|
|
353
|
+
let endFrame = endTimeMs.map { AVAudioFramePosition(Double($0) * Double(sampleRate) / 1000.0) } ?? audioFile.length
|
|
354
|
+
|
|
355
|
+
// Validate frame range
|
|
356
|
+
guard startFrame >= 0 && endFrame <= audioFile.length && startFrame < endFrame else {
|
|
357
|
+
Logger.debug("Invalid time range")
|
|
358
|
+
return nil
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
// Calculate frames per buffer based on points per second
|
|
362
|
+
let actualPointsPerSecond = pointsPerSecond ?? 20
|
|
363
|
+
let framesPerBuffer = AVAudioFrameCount((endFrame - startFrame) / Int64(actualPointsPerSecond))
|
|
364
|
+
|
|
365
|
+
guard let buffer = AVAudioPCMBuffer(pcmFormat: audioFile.processingFormat, frameCapacity: framesPerBuffer) else {
|
|
366
|
+
Logger.debug("Failed to create buffer")
|
|
367
|
+
return nil
|
|
368
|
+
}
|
|
369
|
+
|
|
370
|
+
var dataPoints: [DataPoint] = []
|
|
371
|
+
var minAmplitude: Float = .greatestFiniteMagnitude
|
|
372
|
+
var maxAmplitude: Float = -.greatestFiniteMagnitude
|
|
373
|
+
var currentId = 0
|
|
374
|
+
|
|
375
|
+
audioFile.framePosition = startFrame
|
|
376
|
+
var currentFrame = startFrame
|
|
377
|
+
|
|
378
|
+
while currentFrame < endFrame {
|
|
379
|
+
let framesToRead = min(framesPerBuffer, AVAudioFrameCount(endFrame - currentFrame))
|
|
380
|
+
|
|
381
|
+
do {
|
|
382
|
+
try audioFile.read(into: buffer, frameCount: framesToRead)
|
|
383
|
+
|
|
384
|
+
guard let channelData = buffer.floatChannelData else {
|
|
385
|
+
continue
|
|
386
|
+
}
|
|
387
|
+
|
|
388
|
+
// Process each channel's data
|
|
389
|
+
var summedData = [Float](repeating: 0, count: Int(framesToRead))
|
|
390
|
+
for channel in 0..<numberOfChannels {
|
|
391
|
+
let channelBuffer = UnsafeBufferPointer(start: channelData[channel], count: Int(framesToRead))
|
|
392
|
+
for (index, sample) in channelBuffer.enumerated() {
|
|
393
|
+
summedData[index] += sample
|
|
394
|
+
}
|
|
395
|
+
}
|
|
396
|
+
|
|
397
|
+
// Average across channels
|
|
398
|
+
for i in 0..<summedData.count {
|
|
399
|
+
summedData[i] /= Float(numberOfChannels)
|
|
400
|
+
}
|
|
401
|
+
|
|
402
|
+
// Calculate amplitude based on algorithm
|
|
403
|
+
let amplitude: Float
|
|
404
|
+
if algorithm.lowercased() == "peak" {
|
|
405
|
+
var localMax: Float = 0
|
|
406
|
+
vDSP_maxmgv(summedData, 1, &localMax, vDSP_Length(framesToRead))
|
|
407
|
+
amplitude = localMax
|
|
408
|
+
} else {
|
|
409
|
+
var rms: Float = 0
|
|
410
|
+
vDSP_rmsqv(summedData, 1, &rms, vDSP_Length(framesToRead))
|
|
411
|
+
amplitude = rms
|
|
412
|
+
}
|
|
413
|
+
|
|
414
|
+
minAmplitude = min(minAmplitude, amplitude)
|
|
415
|
+
maxAmplitude = max(maxAmplitude, amplitude)
|
|
416
|
+
|
|
417
|
+
// Create data point
|
|
418
|
+
let startTime = Float(currentFrame) / Float(sampleRate)
|
|
419
|
+
let endTime = Float(currentFrame + Int64(framesToRead)) / Float(sampleRate)
|
|
420
|
+
|
|
421
|
+
let dataPoint = DataPoint(
|
|
422
|
+
id: currentId,
|
|
423
|
+
amplitude: amplitude,
|
|
424
|
+
startTime: startTime,
|
|
425
|
+
endTime: endTime,
|
|
426
|
+
startPosition: Int(currentFrame),
|
|
427
|
+
endPosition: Int(currentFrame + Int64(framesToRead))
|
|
428
|
+
)
|
|
429
|
+
|
|
430
|
+
dataPoints.append(dataPoint)
|
|
431
|
+
currentId += 1
|
|
432
|
+
} catch {
|
|
433
|
+
Logger.debug("Error reading audio data: \(error)")
|
|
434
|
+
return nil
|
|
435
|
+
}
|
|
436
|
+
|
|
437
|
+
currentFrame += Int64(framesToRead)
|
|
438
|
+
}
|
|
439
|
+
|
|
440
|
+
let endTime = CACurrentMediaTime()
|
|
441
|
+
let extractionTime = Float(endTime - startTime) * 1000 // Convert to milliseconds
|
|
442
|
+
|
|
443
|
+
return AudioAnalysisData(
|
|
444
|
+
pointsPerSecond: actualPointsPerSecond,
|
|
445
|
+
durationMs: Float(endFrame - startFrame) * 1000 / Float(sampleRate),
|
|
446
|
+
bitDepth: bitDepth,
|
|
447
|
+
numberOfChannels: numberOfChannels,
|
|
448
|
+
sampleRate: sampleRate,
|
|
449
|
+
samples: Int(endFrame - startFrame),
|
|
450
|
+
dataPoints: dataPoints,
|
|
451
|
+
amplitudeRange: (min: minAmplitude, max: maxAmplitude),
|
|
452
|
+
extractionTimeMs: extractionTime
|
|
453
|
+
)
|
|
454
|
+
}
|
|
455
|
+
|
|
456
|
+
private func calculateZeroCrossingRate(_ data: [Float]) -> Float {
|
|
457
|
+
var count: Float = 0
|
|
458
|
+
for i in 1..<data.count {
|
|
459
|
+
if (data[i] >= 0 && data[i-1] < 0) || (data[i] < 0 && data[i-1] >= 0) {
|
|
460
|
+
count += 1
|
|
461
|
+
}
|
|
462
|
+
}
|
|
463
|
+
return count / Float(data.count)
|
|
464
|
+
}
|
|
465
|
+
|
|
466
|
+
private func calculateEnergy(_ data: [Float]) -> Float {
|
|
467
|
+
var energy: Float = 0
|
|
468
|
+
vDSP_svesq(data, 1, &energy, vDSP_Length(data.count))
|
|
469
|
+
return energy / Float(data.count)
|
|
470
|
+
}
|
|
471
|
+
|
|
472
|
+
/// Trims audio file to specified range
|
|
473
|
+
public func trimAudio(
|
|
474
|
+
startTimeMs: Double,
|
|
475
|
+
endTimeMs: Double,
|
|
476
|
+
outputFormat: [String: Any]?
|
|
477
|
+
) -> TrimResult? {
|
|
478
|
+
guard let audioFile = audioFile else {
|
|
479
|
+
Logger.debug("No audio file loaded")
|
|
480
|
+
return nil
|
|
481
|
+
}
|
|
482
|
+
|
|
483
|
+
let sampleRate = audioFile.fileFormat.sampleRate
|
|
484
|
+
let startFrame = AVAudioFramePosition(startTimeMs * sampleRate / 1000.0)
|
|
485
|
+
let endFrame = AVAudioFramePosition(endTimeMs * sampleRate / 1000.0)
|
|
486
|
+
|
|
487
|
+
// Create output format
|
|
488
|
+
let outputSettings = createOutputSettings(from: outputFormat, originalFormat: audioFile.fileFormat)
|
|
489
|
+
|
|
490
|
+
// Create temporary output file
|
|
491
|
+
let outputURL = FileManager.default.temporaryDirectory
|
|
492
|
+
.appendingPathComponent(UUID().uuidString)
|
|
493
|
+
.appendingPathExtension("wav")
|
|
494
|
+
|
|
495
|
+
do {
|
|
496
|
+
let outputFile = try AVAudioFile(
|
|
497
|
+
forWriting: outputURL,
|
|
498
|
+
settings: outputSettings,
|
|
499
|
+
commonFormat: .pcmFormatFloat32,
|
|
500
|
+
interleaved: false
|
|
501
|
+
)
|
|
502
|
+
|
|
503
|
+
// Read and write in chunks
|
|
504
|
+
let bufferSize = 32768
|
|
505
|
+
let buffer = AVAudioPCMBuffer(
|
|
506
|
+
pcmFormat: audioFile.processingFormat,
|
|
507
|
+
frameCapacity: AVAudioFrameCount(bufferSize)
|
|
508
|
+
)!
|
|
509
|
+
|
|
510
|
+
audioFile.framePosition = startFrame
|
|
511
|
+
var currentFrame = startFrame
|
|
512
|
+
|
|
513
|
+
while currentFrame < endFrame {
|
|
514
|
+
let framesToRead = min(
|
|
515
|
+
AVAudioFrameCount(bufferSize),
|
|
516
|
+
AVAudioFrameCount(endFrame - currentFrame)
|
|
517
|
+
)
|
|
518
|
+
|
|
519
|
+
try audioFile.read(into: buffer, frameCount: framesToRead)
|
|
520
|
+
try outputFile.write(from: buffer)
|
|
521
|
+
|
|
522
|
+
currentFrame += Int64(framesToRead)
|
|
523
|
+
}
|
|
524
|
+
|
|
525
|
+
// Get file size
|
|
526
|
+
let attributes = try FileManager.default.attributesOfItem(atPath: outputURL.path)
|
|
527
|
+
let fileSize = attributes[.size] as! Int64
|
|
528
|
+
|
|
529
|
+
return TrimResult(
|
|
530
|
+
uri: outputURL.absoluteString,
|
|
531
|
+
duration: Double(endFrame - startFrame) / sampleRate,
|
|
532
|
+
size: fileSize
|
|
533
|
+
)
|
|
534
|
+
|
|
535
|
+
} catch {
|
|
536
|
+
Logger.debug("Error trimming audio: \(error)")
|
|
537
|
+
return nil
|
|
538
|
+
}
|
|
539
|
+
}
|
|
540
|
+
|
|
541
|
+
private func createOutputSettings(
|
|
542
|
+
from options: [String: Any]?,
|
|
543
|
+
originalFormat: AVAudioFormat
|
|
544
|
+
) -> [String: Any] {
|
|
545
|
+
var settings: [String: Any] = [:]
|
|
546
|
+
|
|
547
|
+
// Use original format settings as defaults
|
|
548
|
+
settings[AVFormatIDKey] = kAudioFormatLinearPCM
|
|
549
|
+
settings[AVSampleRateKey] = options?["sampleRate"] as? Double ?? originalFormat.sampleRate
|
|
550
|
+
settings[AVNumberOfChannelsKey] = options?["channels"] as? Int ?? originalFormat.channelCount
|
|
551
|
+
settings[AVLinearPCMBitDepthKey] = options?["bitDepth"] as? Int ?? 16
|
|
552
|
+
settings[AVLinearPCMIsFloatKey] = false
|
|
553
|
+
settings[AVLinearPCMIsBigEndianKey] = false
|
|
554
|
+
settings[AVLinearPCMIsNonInterleaved] = false
|
|
555
|
+
|
|
556
|
+
return settings
|
|
557
|
+
}
|
|
558
|
+
|
|
559
|
+
/// Extracts a preview of the audio data with consistent time range support
|
|
560
|
+
/// - Parameters:
|
|
561
|
+
/// - numberOfPoints: The number of points to extract
|
|
562
|
+
/// - startTimeMs: Optional start time in milliseconds
|
|
563
|
+
/// - endTimeMs: Optional end time in milliseconds
|
|
564
|
+
/// - algorithm: The algorithm to use for feature extraction
|
|
565
|
+
/// - featureOptions: The features to extract
|
|
566
|
+
/// - Returns: An `AudioAnalysisData` object containing the extracted features
|
|
567
|
+
public func extractPreview(numberOfPoints: Int, startTimeMs: Double? = nil, endTimeMs: Double? = nil, algorithm: String, featureOptions: [String: Bool]) -> AudioAnalysisData? {
|
|
568
|
+
guard let audioFile = audioFile else {
|
|
569
|
+
reject("FILE_NOT_INITIALIZED", "Audio file is not initialized.")
|
|
570
|
+
return nil
|
|
571
|
+
}
|
|
572
|
+
|
|
573
|
+
let sampleRate = Float(audioFile.fileFormat.sampleRate)
|
|
574
|
+
let totalDurationMs = Double(audioFile.length) / Double(sampleRate) * 1000
|
|
575
|
+
|
|
576
|
+
// Calculate effective time range
|
|
577
|
+
let effectiveStartMs = startTimeMs ?? 0.0
|
|
578
|
+
let effectiveEndMs = min(endTimeMs ?? totalDurationMs, totalDurationMs)
|
|
579
|
+
let durationMs = effectiveEndMs - effectiveStartMs
|
|
580
|
+
|
|
581
|
+
// Convert time to frames
|
|
582
|
+
let startFrame = AVAudioFramePosition(effectiveStartMs * Double(sampleRate) / 1000.0)
|
|
583
|
+
let endFrame = AVAudioFramePosition(effectiveEndMs * Double(sampleRate) / 1000.0)
|
|
584
|
+
let samplesInRange = Int(endFrame - startFrame)
|
|
585
|
+
|
|
586
|
+
guard samplesInRange > 0 else {
|
|
587
|
+
reject("INVALID_RANGE", "Invalid sample range: contains no samples")
|
|
588
|
+
return nil
|
|
589
|
+
}
|
|
590
|
+
|
|
591
|
+
// Calculate exact samples per point to get the requested number of points
|
|
592
|
+
let samplesPerPoint = samplesInRange / numberOfPoints
|
|
593
|
+
var dataPoints = [DataPoint]()
|
|
594
|
+
dataPoints.reserveCapacity(numberOfPoints)
|
|
595
|
+
|
|
596
|
+
var minAmplitude: Float = .greatestFiniteMagnitude
|
|
597
|
+
var maxAmplitude: Float = -.greatestFiniteMagnitude
|
|
598
|
+
|
|
599
|
+
for i in 0..<numberOfPoints {
|
|
600
|
+
let pointStartFrame = startFrame + Int64(i * samplesPerPoint)
|
|
601
|
+
let pointEndFrame = startFrame + Int64((i + 1) * samplesPerPoint)
|
|
602
|
+
let framesToRead = AVAudioFrameCount(pointEndFrame - pointStartFrame)
|
|
603
|
+
|
|
604
|
+
do {
|
|
605
|
+
audioFile.framePosition = pointStartFrame
|
|
606
|
+
let buffer = AVAudioPCMBuffer(pcmFormat: audioFile.processingFormat, frameCapacity: framesToRead)!
|
|
607
|
+
try audioFile.read(into: buffer, frameCount: framesToRead)
|
|
608
|
+
|
|
609
|
+
guard let floatData = buffer.floatChannelData else { continue }
|
|
610
|
+
|
|
611
|
+
var sumSquares: Float = 0
|
|
612
|
+
var zeroCrossings = 0
|
|
613
|
+
var prevValue: Float = 0
|
|
614
|
+
var localMinAmplitude: Float = .greatestFiniteMagnitude
|
|
615
|
+
var localMaxAmplitude: Float = -.greatestFiniteMagnitude
|
|
616
|
+
|
|
617
|
+
// Process samples for this point
|
|
618
|
+
for frame in 0..<Int(framesToRead) {
|
|
619
|
+
let value = floatData[0][frame]
|
|
620
|
+
sumSquares += value * value
|
|
621
|
+
if frame > 0 && value * prevValue < 0 {
|
|
622
|
+
zeroCrossings += 1
|
|
623
|
+
}
|
|
624
|
+
prevValue = value
|
|
625
|
+
|
|
626
|
+
let absValue = abs(value)
|
|
627
|
+
localMinAmplitude = min(localMinAmplitude, absValue)
|
|
628
|
+
localMaxAmplitude = max(localMaxAmplitude, absValue)
|
|
629
|
+
}
|
|
630
|
+
|
|
631
|
+
let features = computeFeatures(segmentData: Array(UnsafeBufferPointer(start: floatData[0], count: Int(framesToRead))),
|
|
632
|
+
sampleRate: sampleRate,
|
|
633
|
+
sumSquares: sumSquares,
|
|
634
|
+
zeroCrossings: zeroCrossings,
|
|
635
|
+
segmentLength: Int(framesToRead),
|
|
636
|
+
featureOptions: featureOptions)
|
|
637
|
+
|
|
638
|
+
let rms = features.rms
|
|
639
|
+
let silent = rms < 0.01
|
|
640
|
+
let dB = featureOptions["dB"] == true ? 20 * log10(rms) : 0
|
|
641
|
+
|
|
642
|
+
let segmentStartTime = Float(pointStartFrame) / sampleRate
|
|
643
|
+
let segmentEndTime = Float(pointEndFrame) / sampleRate
|
|
644
|
+
|
|
645
|
+
dataPoints.append(DataPoint(
|
|
646
|
+
id: uniqueIdCounter,
|
|
647
|
+
amplitude: algorithm == "peak" ? localMaxAmplitude : rms,
|
|
648
|
+
activeSpeech: nil,
|
|
649
|
+
dB: dB,
|
|
650
|
+
silent: silent,
|
|
651
|
+
features: features,
|
|
652
|
+
startTime: segmentStartTime,
|
|
653
|
+
endTime: segmentEndTime,
|
|
654
|
+
startPosition: Int(pointStartFrame),
|
|
655
|
+
endPosition: Int(pointEndFrame),
|
|
656
|
+
speaker: 0
|
|
657
|
+
))
|
|
658
|
+
uniqueIdCounter += 1
|
|
659
|
+
|
|
660
|
+
minAmplitude = min(minAmplitude, localMinAmplitude)
|
|
661
|
+
maxAmplitude = max(maxAmplitude, localMaxAmplitude)
|
|
662
|
+
} catch {
|
|
663
|
+
reject("AUDIO_READ_ERROR", "Error reading audio data: \(error.localizedDescription)")
|
|
664
|
+
return nil
|
|
665
|
+
}
|
|
666
|
+
}
|
|
667
|
+
|
|
668
|
+
let startTime = CACurrentMediaTime() // Start timing
|
|
669
|
+
|
|
670
|
+
let bitDepth = audioFile.fileFormat.settings[AVLinearPCMBitDepthKey] as? Int ?? 16
|
|
671
|
+
let numberOfChannels = Int(audioFile.processingFormat.channelCount)
|
|
672
|
+
|
|
673
|
+
NSLog("""
|
|
674
|
+
[AudioProcessor] Starting preview extraction:
|
|
675
|
+
- numberOfPoints: \(numberOfPoints)
|
|
676
|
+
- startTimeMs: \(String(describing: startTimeMs))
|
|
677
|
+
- endTimeMs: \(String(describing: endTimeMs))
|
|
678
|
+
- durationMs: \(durationMs)
|
|
679
|
+
- sampleRate: \(sampleRate)
|
|
680
|
+
- bitDepth: \(bitDepth)
|
|
681
|
+
- channels: \(numberOfChannels)
|
|
682
|
+
- samplesInRange: \(samplesInRange)
|
|
683
|
+
- samplesPerPoint: \(samplesPerPoint)
|
|
684
|
+
""")
|
|
685
|
+
|
|
686
|
+
let endTime = CACurrentMediaTime()
|
|
687
|
+
let extractionTimeMs = Float((endTime - startTime) * 1000)
|
|
688
|
+
|
|
689
|
+
NSLog("""
|
|
690
|
+
[AudioProcessor] Preview extraction completed:
|
|
691
|
+
- dataPoints generated: \(dataPoints.count)
|
|
692
|
+
- extractionTimeMs: \(String(format: "%.2f", extractionTimeMs))ms
|
|
693
|
+
- amplitudeRange: (min: \(String(format: "%.6f", minAmplitude)), max: \(String(format: "%.6f", maxAmplitude)))
|
|
694
|
+
""")
|
|
695
|
+
|
|
696
|
+
return AudioAnalysisData(
|
|
697
|
+
pointsPerSecond: numberOfPoints,
|
|
698
|
+
durationMs: Float(durationMs),
|
|
699
|
+
bitDepth: bitDepth,
|
|
700
|
+
numberOfChannels: numberOfChannels,
|
|
701
|
+
sampleRate: sampleRate,
|
|
702
|
+
samples: samplesInRange,
|
|
703
|
+
dataPoints: dataPoints,
|
|
704
|
+
amplitudeRange: (min: minAmplitude, max: maxAmplitude),
|
|
705
|
+
speakerChanges: [],
|
|
706
|
+
extractionTimeMs: extractionTimeMs
|
|
707
|
+
)
|
|
708
|
+
}
|
|
319
709
|
}
|
|
@@ -316,6 +316,106 @@ public class ExpoAudioStreamModule: Module, AudioStreamManagerDelegate {
|
|
|
316
316
|
promise.reject("UNKNOWN_ERROR", "Unknown permission status")
|
|
317
317
|
}
|
|
318
318
|
}
|
|
319
|
+
|
|
320
|
+
/// Extracts audio features from an audio file.
|
|
321
|
+
/// - Parameters:
|
|
322
|
+
/// - options: A dictionary containing:
|
|
323
|
+
/// - `fileUri`: The URI of the audio file.
|
|
324
|
+
/// - `startTimeMs`: Optional start time in milliseconds.
|
|
325
|
+
/// - `endTimeMs`: Optional end time in milliseconds.
|
|
326
|
+
/// - `pointsPerSecond`: Number of points per second for analysis.
|
|
327
|
+
/// - `algorithm`: The algorithm to use for extraction.
|
|
328
|
+
/// - `featureOptions`: Features to extract.
|
|
329
|
+
AsyncFunction("extractPreview") { (options: [String: Any], promise: Promise) in
|
|
330
|
+
guard let fileUri = options["fileUri"] as? String,
|
|
331
|
+
let url = URL(string: fileUri) else {
|
|
332
|
+
promise.reject("INVALID_ARGUMENTS", "Invalid file URI provided")
|
|
333
|
+
return
|
|
334
|
+
}
|
|
335
|
+
|
|
336
|
+
let startTimeMs = options["startTimeMs"] as? Double
|
|
337
|
+
let endTimeMs = options["endTimeMs"] as? Double
|
|
338
|
+
let pointsPerSecond = options["pointsPerSecond"] as? Int ?? 20
|
|
339
|
+
let algorithm = options["algorithm"] as? String ?? "rms"
|
|
340
|
+
let featureOptions = options["featureOptions"] as? [String: Bool] ?? [:]
|
|
341
|
+
|
|
342
|
+
DispatchQueue.global().async {
|
|
343
|
+
do {
|
|
344
|
+
let audioProcessor = try AudioProcessor(
|
|
345
|
+
url: url,
|
|
346
|
+
resolve: { result in
|
|
347
|
+
promise.resolve(result)
|
|
348
|
+
},
|
|
349
|
+
reject: { code, message in
|
|
350
|
+
promise.reject(code, message)
|
|
351
|
+
}
|
|
352
|
+
)
|
|
353
|
+
|
|
354
|
+
if let result = audioProcessor.processAudioData(
|
|
355
|
+
startTimeMs: startTimeMs,
|
|
356
|
+
endTimeMs: endTimeMs,
|
|
357
|
+
pointsPerSecond: pointsPerSecond,
|
|
358
|
+
algorithm: algorithm,
|
|
359
|
+
featureOptions: featureOptions
|
|
360
|
+
) {
|
|
361
|
+
promise.resolve(result.toDictionary())
|
|
362
|
+
} else {
|
|
363
|
+
promise.reject("PROCESSING_ERROR", "Failed to process audio data")
|
|
364
|
+
}
|
|
365
|
+
} catch {
|
|
366
|
+
promise.reject("PROCESSING_ERROR", "Failed to initialize audio processor: \(error.localizedDescription)")
|
|
367
|
+
}
|
|
368
|
+
}
|
|
369
|
+
}
|
|
370
|
+
|
|
371
|
+
/// Trims an audio file to specified start and end times.
|
|
372
|
+
/// - Parameters:
|
|
373
|
+
/// - options: A dictionary containing:
|
|
374
|
+
/// - `fileUri`: The URI of the audio file.
|
|
375
|
+
/// - `startTimeMs`: Start time in milliseconds.
|
|
376
|
+
/// - `endTimeMs`: End time in milliseconds.
|
|
377
|
+
/// - `outputFormat`: Optional output format configuration.
|
|
378
|
+
AsyncFunction("trimAudio") { (options: [String: Any], promise: Promise) in
|
|
379
|
+
guard let fileUri = options["fileUri"] as? String,
|
|
380
|
+
let startTimeMs = options["startTimeMs"] as? Double,
|
|
381
|
+
let endTimeMs = options["endTimeMs"] as? Double,
|
|
382
|
+
let url = URL(string: fileUri) else {
|
|
383
|
+
promise.reject("INVALID_ARGUMENTS", "Invalid arguments provided")
|
|
384
|
+
return
|
|
385
|
+
}
|
|
386
|
+
|
|
387
|
+
let outputFormat = options["outputFormat"] as? [String: Any]
|
|
388
|
+
|
|
389
|
+
DispatchQueue.global().async {
|
|
390
|
+
do {
|
|
391
|
+
let audioProcessor = try AudioProcessor(
|
|
392
|
+
url: url,
|
|
393
|
+
resolve: { result in
|
|
394
|
+
promise.resolve(result)
|
|
395
|
+
},
|
|
396
|
+
reject: { code, message in
|
|
397
|
+
promise.reject(code, message)
|
|
398
|
+
}
|
|
399
|
+
)
|
|
400
|
+
|
|
401
|
+
if let result = audioProcessor.trimAudio(
|
|
402
|
+
startTimeMs: startTimeMs,
|
|
403
|
+
endTimeMs: endTimeMs,
|
|
404
|
+
outputFormat: outputFormat
|
|
405
|
+
) {
|
|
406
|
+
promise.resolve([
|
|
407
|
+
"uri": result.uri,
|
|
408
|
+
"duration": result.duration,
|
|
409
|
+
"size": result.size
|
|
410
|
+
])
|
|
411
|
+
} else {
|
|
412
|
+
promise.reject("TRIM_ERROR", "Failed to trim audio")
|
|
413
|
+
}
|
|
414
|
+
} catch {
|
|
415
|
+
promise.reject("PROCESSING_ERROR", "Failed to initialize audio processor: \(error.localizedDescription)")
|
|
416
|
+
}
|
|
417
|
+
}
|
|
418
|
+
}
|
|
319
419
|
}
|
|
320
420
|
|
|
321
421
|
func audioStreamManager(_ manager: AudioStreamManager, didPauseRecording pauseTime: Date) {
|
package/ios/Features.swift
CHANGED
|
@@ -21,6 +21,36 @@ public struct Features {
|
|
|
21
21
|
var chromagram: [Float]?
|
|
22
22
|
var tempo: Float?
|
|
23
23
|
var hnr: Float?
|
|
24
|
+
|
|
25
|
+
init(
|
|
26
|
+
energy: Float = 0,
|
|
27
|
+
mfcc: [Float] = [],
|
|
28
|
+
rms: Float = 0,
|
|
29
|
+
minAmplitude: Float = 0,
|
|
30
|
+
maxAmplitude: Float = 0,
|
|
31
|
+
zcr: Float = 0,
|
|
32
|
+
spectralCentroid: Float = 0,
|
|
33
|
+
spectralFlatness: Float = 0,
|
|
34
|
+
spectralRollOff: Float? = nil,
|
|
35
|
+
spectralBandwidth: Float? = nil,
|
|
36
|
+
chromagram: [Float]? = nil,
|
|
37
|
+
tempo: Float? = nil,
|
|
38
|
+
hnr: Float? = nil
|
|
39
|
+
) {
|
|
40
|
+
self.energy = energy
|
|
41
|
+
self.mfcc = mfcc
|
|
42
|
+
self.rms = rms
|
|
43
|
+
self.minAmplitude = minAmplitude
|
|
44
|
+
self.maxAmplitude = maxAmplitude
|
|
45
|
+
self.zcr = zcr
|
|
46
|
+
self.spectralCentroid = spectralCentroid
|
|
47
|
+
self.spectralFlatness = spectralFlatness
|
|
48
|
+
self.spectralRollOff = spectralRollOff
|
|
49
|
+
self.spectralBandwidth = spectralBandwidth
|
|
50
|
+
self.chromagram = chromagram
|
|
51
|
+
self.tempo = tempo
|
|
52
|
+
self.hnr = hnr
|
|
53
|
+
}
|
|
24
54
|
}
|
|
25
55
|
|
|
26
56
|
extension Features {
|
package/package.json
CHANGED
package/plugin/build/index.d.ts
CHANGED
|
@@ -13,7 +13,6 @@ interface AudioStreamPluginOptions {
|
|
|
13
13
|
iosConfig?: {
|
|
14
14
|
allowBackgroundAudioControls?: boolean;
|
|
15
15
|
backgroundProcessingTitle?: string;
|
|
16
|
-
keepAliveInBackground?: boolean;
|
|
17
16
|
};
|
|
18
17
|
}
|
|
19
18
|
declare const withRecordingPermission: ConfigPlugin<AudioStreamPluginOptions>;
|
package/plugin/build/index.js
CHANGED
|
@@ -21,11 +21,6 @@ const withRecordingPermission = (config, props) => {
|
|
|
21
21
|
useLocation: false,
|
|
22
22
|
useExternalAccessory: false,
|
|
23
23
|
},
|
|
24
|
-
iosConfig: {
|
|
25
|
-
allowBackgroundAudioControls: false,
|
|
26
|
-
backgroundProcessingTitle: 'Audio Recording',
|
|
27
|
-
keepAliveInBackground: true,
|
|
28
|
-
},
|
|
29
24
|
...(props || {}),
|
|
30
25
|
};
|
|
31
26
|
const { enablePhoneStateHandling, enableNotifications, enableBackgroundAudio, } = options;
|
package/plugin/src/index.ts
CHANGED
|
@@ -30,7 +30,6 @@ interface AudioStreamPluginOptions {
|
|
|
30
30
|
iosConfig?: {
|
|
31
31
|
allowBackgroundAudioControls?: boolean
|
|
32
32
|
backgroundProcessingTitle?: string
|
|
33
|
-
keepAliveInBackground?: boolean
|
|
34
33
|
}
|
|
35
34
|
}
|
|
36
35
|
|
|
@@ -49,11 +48,6 @@ const withRecordingPermission: ConfigPlugin<AudioStreamPluginOptions> = (
|
|
|
49
48
|
useLocation: false,
|
|
50
49
|
useExternalAccessory: false,
|
|
51
50
|
},
|
|
52
|
-
iosConfig: {
|
|
53
|
-
allowBackgroundAudioControls: false,
|
|
54
|
-
backgroundProcessingTitle: 'Audio Recording',
|
|
55
|
-
keepAliveInBackground: true,
|
|
56
|
-
},
|
|
57
51
|
...(props || {}),
|
|
58
52
|
}
|
|
59
53
|
|
|
@@ -1,5 +1,15 @@
|
|
|
1
1
|
// packages/expo-audio-stream/src/AudioAnalysis/AudioAnalysis.types.ts
|
|
2
2
|
|
|
3
|
+
/**
|
|
4
|
+
* Represents the configuration for decoding audio data.
|
|
5
|
+
*/
|
|
6
|
+
export interface DecodingConfig {
|
|
7
|
+
targetSampleRate?: number
|
|
8
|
+
targetChannels?: number
|
|
9
|
+
targetBitDepth?: number
|
|
10
|
+
normalizeAudio?: boolean
|
|
11
|
+
}
|
|
12
|
+
|
|
3
13
|
/**
|
|
4
14
|
* Represents various audio features extracted from an audio signal.
|
|
5
15
|
*/
|
|
@@ -81,3 +91,59 @@ export interface AudioAnalysis {
|
|
|
81
91
|
speaker: number // Speaker identifier.
|
|
82
92
|
}[]
|
|
83
93
|
}
|
|
94
|
+
|
|
95
|
+
/**
|
|
96
|
+
* Options for specifying a time range within an audio file.
|
|
97
|
+
*/
|
|
98
|
+
export interface AudioRangeOptions {
|
|
99
|
+
/** Start time in milliseconds */
|
|
100
|
+
startTime?: number
|
|
101
|
+
/** End time in milliseconds */
|
|
102
|
+
endTime?: number
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
/**
|
|
106
|
+
* Options for generating a quick preview of audio waveform.
|
|
107
|
+
* This is optimized for UI rendering with a specified number of points.
|
|
108
|
+
*/
|
|
109
|
+
export interface PreviewOptions extends AudioRangeOptions {
|
|
110
|
+
/** URI of the audio file to analyze */
|
|
111
|
+
fileUri: string
|
|
112
|
+
/**
|
|
113
|
+
* Total number of points to generate for the preview.
|
|
114
|
+
* @default 100
|
|
115
|
+
*/
|
|
116
|
+
numberOfPoints?: number
|
|
117
|
+
/**
|
|
118
|
+
* Algorithm used to calculate amplitude values
|
|
119
|
+
* @default "rms"
|
|
120
|
+
*/
|
|
121
|
+
algorithm?: AmplitudeAlgorithm
|
|
122
|
+
/**
|
|
123
|
+
* Optional configuration for decoding the audio file.
|
|
124
|
+
* Defaults to:
|
|
125
|
+
* - targetSampleRate: undefined (keep original)
|
|
126
|
+
* - targetChannels: undefined (keep original)
|
|
127
|
+
* - targetBitDepth: 16
|
|
128
|
+
* - normalizeAudio: false
|
|
129
|
+
*/
|
|
130
|
+
decodingOptions?: DecodingConfig
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
/**
|
|
134
|
+
* Represents a simplified preview of audio waveform,
|
|
135
|
+
* optimized for quick visualization.
|
|
136
|
+
*/
|
|
137
|
+
export interface AudioPreview {
|
|
138
|
+
/** Number of data points per second */
|
|
139
|
+
pointsPerSecond: number
|
|
140
|
+
/** Duration of the audio in milliseconds */
|
|
141
|
+
durationMs: number
|
|
142
|
+
/** Range of amplitude values in the preview */
|
|
143
|
+
amplitudeRange: {
|
|
144
|
+
min: number
|
|
145
|
+
max: number
|
|
146
|
+
}
|
|
147
|
+
/** Array of data points representing the waveform */
|
|
148
|
+
dataPoints: DataPoint[]
|
|
149
|
+
}
|