@siteed/expo-audio-stream 2.0.0 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +17 -1
- package/README.md +202 -1
- package/android/src/main/java/net/siteed/audiostream/AudioProcessor.kt +300 -1
- package/android/src/main/java/net/siteed/audiostream/AudioRecordingService.kt +16 -2
- package/android/src/main/java/net/siteed/audiostream/AudioTrimmer.kt +1099 -0
- package/android/src/main/java/net/siteed/audiostream/Constants.kt +1 -0
- package/android/src/main/java/net/siteed/audiostream/ExpoAudioStreamModule.kt +274 -44
- package/build/AudioAnalysis/AudioAnalysis.types.d.ts +35 -0
- package/build/AudioAnalysis/AudioAnalysis.types.d.ts.map +1 -1
- package/build/AudioAnalysis/AudioAnalysis.types.js.map +1 -1
- package/build/AudioAnalysis/extractAudioAnalysis.d.ts +2 -12
- package/build/AudioAnalysis/extractAudioAnalysis.d.ts.map +1 -1
- package/build/AudioAnalysis/extractAudioAnalysis.js +0 -26
- package/build/AudioAnalysis/extractAudioAnalysis.js.map +1 -1
- package/build/AudioAnalysis/extractAudioData.d.ts +3 -0
- package/build/AudioAnalysis/extractAudioData.d.ts.map +1 -0
- package/build/AudioAnalysis/extractAudioData.js +5 -0
- package/build/AudioAnalysis/extractAudioData.js.map +1 -0
- package/build/AudioAnalysis/extractMelSpectrogram.d.ts +14 -0
- package/build/AudioAnalysis/extractMelSpectrogram.d.ts.map +1 -0
- package/build/AudioAnalysis/extractMelSpectrogram.js +85 -0
- package/build/AudioAnalysis/extractMelSpectrogram.js.map +1 -0
- package/build/AudioAnalysis/extractPreview.d.ts +11 -0
- package/build/AudioAnalysis/extractPreview.d.ts.map +1 -0
- package/build/AudioAnalysis/extractPreview.js +25 -0
- package/build/AudioAnalysis/extractPreview.js.map +1 -0
- package/build/ExpoAudioStream.types.d.ts +329 -3
- package/build/ExpoAudioStream.types.d.ts.map +1 -1
- package/build/ExpoAudioStream.types.js.map +1 -1
- package/build/ExpoAudioStreamModule.d.ts.map +1 -1
- package/build/ExpoAudioStreamModule.js +455 -1
- package/build/ExpoAudioStreamModule.js.map +1 -1
- package/build/WebRecorder.web.js +2 -2
- package/build/WebRecorder.web.js.map +1 -1
- package/build/index.d.ts +6 -3
- package/build/index.d.ts.map +1 -1
- package/build/index.js +6 -2
- package/build/index.js.map +1 -1
- package/build/trimAudio.d.ts +25 -0
- package/build/trimAudio.d.ts.map +1 -0
- package/build/trimAudio.js +67 -0
- package/build/trimAudio.js.map +1 -0
- package/ios/AudioProcessor.swift +536 -81
- package/ios/ExpoAudioStreamModule.swift +125 -18
- package/package.json +1 -1
- package/plugin/build/index.js +6 -1
- package/plugin/src/index.ts +9 -1
- package/src/AudioAnalysis/AudioAnalysis.types.ts +38 -1
- package/src/AudioAnalysis/extractAudioAnalysis.ts +1 -38
- package/src/AudioAnalysis/extractAudioData.ts +6 -0
- package/src/AudioAnalysis/extractMelSpectrogram.ts +144 -0
- package/src/AudioAnalysis/extractPreview.ts +34 -0
- package/src/ExpoAudioStream.types.ts +354 -42
- package/src/ExpoAudioStreamModule.ts +682 -1
- package/src/WebRecorder.web.ts +2 -2
- package/src/index.ts +7 -8
- package/src/trimAudio.ts +90 -0
|
@@ -313,46 +313,136 @@ public class ExpoAudioStreamModule: Module, AudioStreamManagerDelegate {
|
|
|
313
313
|
/// - Parameters:
|
|
314
314
|
/// - options: A dictionary containing:
|
|
315
315
|
/// - `fileUri`: The URI of the audio file.
|
|
316
|
-
/// - `
|
|
317
|
-
/// - `
|
|
316
|
+
/// - `mode`: Trim mode ('single', 'keep', or 'remove').
|
|
317
|
+
/// - `startTimeMs`: Start time in milliseconds (for 'single' mode).
|
|
318
|
+
/// - `endTimeMs`: End time in milliseconds (for 'single' mode).
|
|
319
|
+
/// - `ranges`: Array of time ranges (for 'keep' and 'remove' modes).
|
|
320
|
+
/// - `outputFileName`: Optional name for the output file.
|
|
318
321
|
/// - `outputFormat`: Optional output format configuration.
|
|
322
|
+
/// - `decodingOptions`: Optional decoding configuration.
|
|
319
323
|
AsyncFunction("trimAudio") { (options: [String: Any], promise: Promise) in
|
|
320
324
|
guard let fileUri = options["fileUri"] as? String,
|
|
321
|
-
let startTimeMs = options["startTimeMs"] as? Double,
|
|
322
|
-
let endTimeMs = options["endTimeMs"] as? Double,
|
|
323
325
|
let url = URL(string: fileUri) else {
|
|
324
|
-
promise.reject("INVALID_ARGUMENTS", "Invalid
|
|
326
|
+
promise.reject("INVALID_ARGUMENTS", "Invalid file URI provided")
|
|
325
327
|
return
|
|
326
328
|
}
|
|
327
329
|
|
|
330
|
+
let mode = options["mode"] as? String ?? "single"
|
|
331
|
+
let startTimeMs = options["startTimeMs"] as? Double
|
|
332
|
+
let endTimeMs = options["endTimeMs"] as? Double
|
|
333
|
+
let ranges = options["ranges"] as? [[String: Double]]
|
|
334
|
+
let outputFileName = options["outputFileName"] as? String
|
|
328
335
|
let outputFormat = options["outputFormat"] as? [String: Any]
|
|
336
|
+
let decodingOptions = options["decodingOptions"] as? [String: Any]
|
|
337
|
+
|
|
338
|
+
// Add detailed logging for filename and format options
|
|
339
|
+
Logger.debug("Trim audio request:")
|
|
340
|
+
Logger.debug("- Input file: \(fileUri)")
|
|
341
|
+
Logger.debug("- Mode: \(mode)")
|
|
342
|
+
Logger.debug("- Output filename: \(outputFileName ?? "not specified (will generate UUID)")")
|
|
343
|
+
if let format = outputFormat?["format"] as? String {
|
|
344
|
+
Logger.debug("- Output format: \(format)")
|
|
345
|
+
} else {
|
|
346
|
+
Logger.debug("- Output format: not specified (will use default)")
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
// Input validation based on mode
|
|
350
|
+
switch mode {
|
|
351
|
+
case "single":
|
|
352
|
+
guard let start = startTimeMs, let end = endTimeMs else {
|
|
353
|
+
promise.reject("INVALID_ARGUMENTS", "startTimeMs and endTimeMs required for 'single' mode")
|
|
354
|
+
return
|
|
355
|
+
}
|
|
356
|
+
guard start >= 0, end > start else {
|
|
357
|
+
promise.reject("INVALID_ARGUMENTS", "Invalid time range")
|
|
358
|
+
return
|
|
359
|
+
}
|
|
360
|
+
case "keep", "remove":
|
|
361
|
+
guard let rangesArray = ranges, !rangesArray.isEmpty else {
|
|
362
|
+
promise.reject("INVALID_ARGUMENTS", "'ranges' array required for 'keep' or 'remove' mode")
|
|
363
|
+
return
|
|
364
|
+
}
|
|
365
|
+
default:
|
|
366
|
+
promise.reject("INVALID_MODE", "Mode must be 'single', 'keep', or 'remove'")
|
|
367
|
+
return
|
|
368
|
+
}
|
|
329
369
|
|
|
330
370
|
DispatchQueue.global().async {
|
|
331
371
|
do {
|
|
332
372
|
let audioProcessor = try AudioProcessor(
|
|
333
373
|
url: url,
|
|
334
|
-
resolve: { result in
|
|
335
|
-
|
|
336
|
-
},
|
|
337
|
-
reject: { code, message in
|
|
338
|
-
promise.reject(code, message)
|
|
339
|
-
}
|
|
374
|
+
resolve: { result in promise.resolve(result) },
|
|
375
|
+
reject: { code, message in promise.reject(code, message) }
|
|
340
376
|
)
|
|
341
|
-
|
|
377
|
+
|
|
378
|
+
let progressCallback: (Float, Int64, Int64) -> Void = { progress, bytesProcessed, totalBytes in
|
|
379
|
+
self.sendEvent("TrimProgress", [
|
|
380
|
+
"progress": progress,
|
|
381
|
+
"bytesProcessed": bytesProcessed,
|
|
382
|
+
"totalBytes": totalBytes
|
|
383
|
+
])
|
|
384
|
+
}
|
|
385
|
+
|
|
386
|
+
let startTime = CACurrentMediaTime()
|
|
342
387
|
if let result = audioProcessor.trimAudio(
|
|
388
|
+
mode: mode,
|
|
343
389
|
startTimeMs: startTimeMs,
|
|
344
390
|
endTimeMs: endTimeMs,
|
|
345
|
-
|
|
391
|
+
ranges: ranges,
|
|
392
|
+
outputFileName: outputFileName,
|
|
393
|
+
outputFormat: outputFormat,
|
|
394
|
+
decodingOptions: decodingOptions,
|
|
395
|
+
progressCallback: progressCallback
|
|
346
396
|
) {
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
397
|
+
let processingTimeMs = Int((CACurrentMediaTime() - startTime) * 1000)
|
|
398
|
+
var resultDict = result.toDictionary()
|
|
399
|
+
resultDict["processingInfo"] = ["durationMs": processingTimeMs]
|
|
400
|
+
|
|
401
|
+
let uri = result.uri
|
|
402
|
+
Logger.debug("Trim completed successfully in \(processingTimeMs)ms")
|
|
403
|
+
Logger.debug("Output file URI: \(uri)")
|
|
404
|
+
|
|
405
|
+
// Verify file exists
|
|
406
|
+
let fileManager = FileManager.default
|
|
407
|
+
if let url = URL(string: uri) {
|
|
408
|
+
let exists = fileManager.fileExists(atPath: url.path)
|
|
409
|
+
Logger.debug("File exists at path \(url.path): \(exists)")
|
|
410
|
+
|
|
411
|
+
// Log filename details
|
|
412
|
+
Logger.debug("Filename: \(url.lastPathComponent)")
|
|
413
|
+
Logger.debug("File extension: \(url.pathExtension.lowercased())")
|
|
414
|
+
|
|
415
|
+
// If format is AAC, ensure we're using the correct extension and MIME type
|
|
416
|
+
if let format = outputFormat?["format"] as? String,
|
|
417
|
+
format.lowercased() == "aac" {
|
|
418
|
+
|
|
419
|
+
Logger.debug("AAC format detected - ensuring correct metadata")
|
|
420
|
+
|
|
421
|
+
// For AAC format, ensure we're using the correct extension and MIME type
|
|
422
|
+
if url.pathExtension.lowercased() == "m4a" {
|
|
423
|
+
Logger.debug("File has correct m4a extension for AAC audio")
|
|
424
|
+
|
|
425
|
+
// Just update the MIME type in the result to ensure correct playback
|
|
426
|
+
if var compression = resultDict["compression"] as? [String: Any] {
|
|
427
|
+
compression["mimeType"] = "audio/mp4"
|
|
428
|
+
resultDict["compression"] = compression
|
|
429
|
+
}
|
|
430
|
+
|
|
431
|
+
resultDict["mimeType"] = "audio/mp4"
|
|
432
|
+
resultDict["actualFormat"] = "m4a"
|
|
433
|
+
} else {
|
|
434
|
+
Logger.debug("Warning: AAC format should use .m4a extension, but found .\(url.pathExtension.lowercased())")
|
|
435
|
+
}
|
|
436
|
+
}
|
|
437
|
+
}
|
|
438
|
+
|
|
439
|
+
promise.resolve(resultDict)
|
|
352
440
|
} else {
|
|
441
|
+
Logger.debug("Failed to trim audio")
|
|
353
442
|
promise.reject("TRIM_ERROR", "Failed to trim audio")
|
|
354
443
|
}
|
|
355
444
|
} catch {
|
|
445
|
+
Logger.debug("Failed to initialize audio processor: \(error.localizedDescription)")
|
|
356
446
|
promise.reject("PROCESSING_ERROR", "Failed to initialize audio processor: \(error.localizedDescription)")
|
|
357
447
|
}
|
|
358
448
|
}
|
|
@@ -493,6 +583,23 @@ public class ExpoAudioStreamModule: Module, AudioStreamManagerDelegate {
|
|
|
493
583
|
promise.reject("PROCESSING_ERROR", "Failed to process audio file: \(error.localizedDescription)")
|
|
494
584
|
}
|
|
495
585
|
}
|
|
586
|
+
|
|
587
|
+
/// Extracts mel spectrogram data from a file.
|
|
588
|
+
///
|
|
589
|
+
/// - Parameters:
|
|
590
|
+
/// - options: A dictionary containing:
|
|
591
|
+
/// - `fileUri`: The URI of the audio file.
|
|
592
|
+
/// - `pointsPerSecond`: The number of data points to extract per second of audio.
|
|
593
|
+
/// - promise: A promise to resolve with the extracted mel spectrogram data or reject with an error.
|
|
594
|
+
/// - Returns: Promise to be resolved with mel spectrogram data.
|
|
595
|
+
AsyncFunction("extractMelSpectrogram") { (options: [String: Any], promise: Promise) in
|
|
596
|
+
// This is a placeholder implementation that will be fully implemented later
|
|
597
|
+
// Currently, mel spectrogram extraction is only available on Android
|
|
598
|
+
promise.reject(
|
|
599
|
+
"UNSUPPORTED_PLATFORM",
|
|
600
|
+
"Mel spectrogram extraction is currently only available on Android and is experimental"
|
|
601
|
+
)
|
|
602
|
+
}
|
|
496
603
|
}
|
|
497
604
|
|
|
498
605
|
func audioStreamManager(_ manager: AudioStreamManager, didReceiveInterruption info: [String: Any]) {
|
package/package.json
CHANGED
package/plugin/build/index.js
CHANGED
|
@@ -48,7 +48,12 @@ const withRecordingPermission = (config, props) => {
|
|
|
48
48
|
if (options.iosBackgroundModes?.useAudio === true &&
|
|
49
49
|
enableBackgroundAudio === true &&
|
|
50
50
|
!existingBackgroundModes.includes('audio')) {
|
|
51
|
-
|
|
51
|
+
// Don't automatically add 'audio' background mode as it's only for playback
|
|
52
|
+
// existingBackgroundModes.push('audio')
|
|
53
|
+
// Instead, ensure processing mode is used for background recording
|
|
54
|
+
if (options.iosBackgroundModes?.useProcessing !== true) {
|
|
55
|
+
console.warn(`${LOG_PREFIX} Warning: Background audio recording requires 'processing' background mode. Please enable 'useProcessing' in iosBackgroundModes.`);
|
|
56
|
+
}
|
|
52
57
|
}
|
|
53
58
|
if (options.iosBackgroundModes?.useVoIP === true &&
|
|
54
59
|
enablePhoneStateHandling === true) {
|
package/plugin/src/index.ts
CHANGED
|
@@ -90,7 +90,15 @@ const withRecordingPermission: ConfigPlugin<AudioStreamPluginOptions> = (
|
|
|
90
90
|
enableBackgroundAudio === true &&
|
|
91
91
|
!existingBackgroundModes.includes('audio')
|
|
92
92
|
) {
|
|
93
|
-
|
|
93
|
+
// Don't automatically add 'audio' background mode as it's only for playback
|
|
94
|
+
// existingBackgroundModes.push('audio')
|
|
95
|
+
|
|
96
|
+
// Instead, ensure processing mode is used for background recording
|
|
97
|
+
if (options.iosBackgroundModes?.useProcessing !== true) {
|
|
98
|
+
console.warn(
|
|
99
|
+
`${LOG_PREFIX} Warning: Background audio recording requires 'processing' background mode. Please enable 'useProcessing' in iosBackgroundModes.`
|
|
100
|
+
)
|
|
101
|
+
}
|
|
94
102
|
}
|
|
95
103
|
|
|
96
104
|
if (
|
|
@@ -82,7 +82,7 @@ export interface DataPoint {
|
|
|
82
82
|
id: number
|
|
83
83
|
amplitude: number // Peak amplitude for the segment
|
|
84
84
|
rms: number // Root mean square value
|
|
85
|
-
dB: number //
|
|
85
|
+
dB: number // dBFS (decibels relative to full scale) computed from RMS value
|
|
86
86
|
silent: boolean // Always computed
|
|
87
87
|
features?: AudioFeatures
|
|
88
88
|
speech?: SpeechFeatures
|
|
@@ -163,3 +163,40 @@ export interface PreviewOptions extends AudioRangeOptions {
|
|
|
163
163
|
*/
|
|
164
164
|
decodingOptions?: DecodingConfig
|
|
165
165
|
}
|
|
166
|
+
|
|
167
|
+
/**
|
|
168
|
+
* Options for mel-spectrogram extraction
|
|
169
|
+
*
|
|
170
|
+
* @experimental This feature is experimental and currently only available on Android.
|
|
171
|
+
* The API may change in future versions.
|
|
172
|
+
*/
|
|
173
|
+
export interface ExtractMelSpectrogramOptions {
|
|
174
|
+
fileUri?: string // Path to audio file
|
|
175
|
+
arrayBuffer?: ArrayBuffer // Raw audio buffer
|
|
176
|
+
windowSizeMs: number // Window size in ms (e.g., 25)
|
|
177
|
+
hopLengthMs: number // Hop length in ms (e.g., 10)
|
|
178
|
+
nMels: number // Number of mel filters (e.g., 60)
|
|
179
|
+
fMin?: number // Min frequency (default: 0)
|
|
180
|
+
fMax?: number // Max frequency (default: sampleRate / 2)
|
|
181
|
+
windowType?: 'hann' | 'hamming' // Window function (default: 'hann')
|
|
182
|
+
normalize?: boolean // Mean normalization (default: false)
|
|
183
|
+
logScale?: boolean // Log scaling of mel energies (default: true)
|
|
184
|
+
decodingOptions?: DecodingConfig // Audio decoding settings
|
|
185
|
+
startTimeMs?: number // Optional start time
|
|
186
|
+
endTimeMs?: number // Optional end time
|
|
187
|
+
logger?: ConsoleLike
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
/**
|
|
191
|
+
* Return type for mel spectrogram extraction
|
|
192
|
+
*
|
|
193
|
+
* @experimental This feature is experimental and currently only available on Android.
|
|
194
|
+
* The API may change in future versions.
|
|
195
|
+
*/
|
|
196
|
+
export interface MelSpectrogram {
|
|
197
|
+
spectrogram: number[][] // 2D array [time][mel]
|
|
198
|
+
sampleRate: number // Audio sample rate
|
|
199
|
+
nMels: number // Number of mel filters
|
|
200
|
+
timeSteps: number // Number of time frames
|
|
201
|
+
durationMs: number // Audio duration in ms
|
|
202
|
+
}
|
|
@@ -7,7 +7,7 @@
|
|
|
7
7
|
*/
|
|
8
8
|
import crc32 from 'crc-32'
|
|
9
9
|
|
|
10
|
-
import { ConsoleLike
|
|
10
|
+
import { ConsoleLike } from '../ExpoAudioStream.types'
|
|
11
11
|
import ExpoAudioStreamModule from '../ExpoAudioStreamModule'
|
|
12
12
|
import { isWeb } from '../constants'
|
|
13
13
|
import {
|
|
@@ -15,7 +15,6 @@ import {
|
|
|
15
15
|
AudioFeaturesOptions,
|
|
16
16
|
DataPoint,
|
|
17
17
|
DecodingConfig,
|
|
18
|
-
PreviewOptions,
|
|
19
18
|
} from './AudioAnalysis.types'
|
|
20
19
|
import { processAudioBuffer } from '../utils/audioProcessing'
|
|
21
20
|
import { convertPCMToFloat32 } from '../utils/convertPCMToFloat32'
|
|
@@ -332,39 +331,3 @@ export const extractRawWavAnalysis = async ({
|
|
|
332
331
|
return res
|
|
333
332
|
}
|
|
334
333
|
}
|
|
335
|
-
|
|
336
|
-
/**
|
|
337
|
-
* Generates a simplified preview of the audio waveform for quick visualization.
|
|
338
|
-
* Ideal for UI rendering with a specified number of points.
|
|
339
|
-
*
|
|
340
|
-
* @param options - The options for the preview, including file URI and time range.
|
|
341
|
-
* @returns A promise that resolves to the audio preview data.
|
|
342
|
-
*/
|
|
343
|
-
export async function extractPreview({
|
|
344
|
-
fileUri,
|
|
345
|
-
numberOfPoints = 100,
|
|
346
|
-
startTimeMs = 0,
|
|
347
|
-
endTimeMs = 30000, // First 30 seconds
|
|
348
|
-
decodingOptions,
|
|
349
|
-
logger,
|
|
350
|
-
}: PreviewOptions): Promise<AudioAnalysis> {
|
|
351
|
-
const durationMs = endTimeMs - startTimeMs
|
|
352
|
-
const segmentDurationMs = Math.floor(durationMs / numberOfPoints)
|
|
353
|
-
|
|
354
|
-
// Call extractAudioAnalysis with calculated parameters
|
|
355
|
-
const analysis = await extractAudioAnalysis({
|
|
356
|
-
fileUri,
|
|
357
|
-
startTimeMs,
|
|
358
|
-
endTimeMs,
|
|
359
|
-
logger,
|
|
360
|
-
segmentDurationMs,
|
|
361
|
-
decodingOptions,
|
|
362
|
-
})
|
|
363
|
-
|
|
364
|
-
// Transform the result into AudioPreview format
|
|
365
|
-
return analysis
|
|
366
|
-
}
|
|
367
|
-
|
|
368
|
-
export const extractAudioData = async (props: ExtractAudioDataOptions) => {
|
|
369
|
-
return await ExpoAudioStreamModule.extractAudioData(props)
|
|
370
|
-
}
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
import { ExtractAudioDataOptions } from '../ExpoAudioStream.types'
|
|
2
|
+
import ExpoAudioStreamModule from '../ExpoAudioStreamModule'
|
|
3
|
+
|
|
4
|
+
export const extractAudioData = async (props: ExtractAudioDataOptions) => {
|
|
5
|
+
return await ExpoAudioStreamModule.extractAudioData(props)
|
|
6
|
+
}
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @experimental This feature is experimental and currently only available on Android.
|
|
3
|
+
* The API may change in future versions. The web implementation is a placeholder.
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { ExpoAudioStreamModule } from '..'
|
|
7
|
+
import { isWeb } from '../constants'
|
|
8
|
+
import {
|
|
9
|
+
ExtractMelSpectrogramOptions,
|
|
10
|
+
MelSpectrogram,
|
|
11
|
+
} from './AudioAnalysis.types'
|
|
12
|
+
import {
|
|
13
|
+
processAudioBuffer,
|
|
14
|
+
ProcessedAudioData,
|
|
15
|
+
} from '../utils/audioProcessing'
|
|
16
|
+
|
|
17
|
+
/**
|
|
18
|
+
* Extracts a mel spectrogram from audio data
|
|
19
|
+
*
|
|
20
|
+
* @experimental This feature is experimental and currently only available on Android.
|
|
21
|
+
* The iOS implementation will throw an "UNSUPPORTED_PLATFORM" error.
|
|
22
|
+
* The web implementation is a placeholder that returns dummy data.
|
|
23
|
+
*/
|
|
24
|
+
export async function extractMelSpectrogram(
|
|
25
|
+
options: ExtractMelSpectrogramOptions
|
|
26
|
+
): Promise<MelSpectrogram> {
|
|
27
|
+
const {
|
|
28
|
+
fileUri,
|
|
29
|
+
arrayBuffer,
|
|
30
|
+
windowSizeMs,
|
|
31
|
+
hopLengthMs,
|
|
32
|
+
nMels,
|
|
33
|
+
fMin = 0,
|
|
34
|
+
fMax,
|
|
35
|
+
windowType = 'hann',
|
|
36
|
+
normalize = false,
|
|
37
|
+
logScale = true,
|
|
38
|
+
decodingOptions,
|
|
39
|
+
startTimeMs,
|
|
40
|
+
endTimeMs,
|
|
41
|
+
logger,
|
|
42
|
+
} = options
|
|
43
|
+
|
|
44
|
+
if (isWeb) {
|
|
45
|
+
// Create audio context
|
|
46
|
+
const audioContext = new (window.AudioContext ||
|
|
47
|
+
(window as any).webkitAudioContext)()
|
|
48
|
+
|
|
49
|
+
try {
|
|
50
|
+
// Process audio data using the existing utility
|
|
51
|
+
const processedAudio: ProcessedAudioData = await processAudioBuffer(
|
|
52
|
+
{
|
|
53
|
+
arrayBuffer,
|
|
54
|
+
fileUri,
|
|
55
|
+
targetSampleRate:
|
|
56
|
+
decodingOptions?.targetSampleRate || 16000,
|
|
57
|
+
targetChannels: decodingOptions?.targetChannels || 1,
|
|
58
|
+
normalizeAudio: decodingOptions?.normalizeAudio ?? false,
|
|
59
|
+
startTimeMs,
|
|
60
|
+
endTimeMs,
|
|
61
|
+
audioContext,
|
|
62
|
+
logger: options.logger,
|
|
63
|
+
}
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
// Calculate window and hop size in samples
|
|
67
|
+
const sampleRate = processedAudio.sampleRate
|
|
68
|
+
const windowSize = Math.floor((windowSizeMs * sampleRate) / 1000)
|
|
69
|
+
const hopLength = Math.floor((hopLengthMs * sampleRate) / 1000)
|
|
70
|
+
const maxFreq = fMax || sampleRate / 2
|
|
71
|
+
|
|
72
|
+
// Extract the mel spectrogram from the processed audio
|
|
73
|
+
const spectrogram = computeMelSpectrogram(
|
|
74
|
+
processedAudio.channelData,
|
|
75
|
+
sampleRate,
|
|
76
|
+
nMels,
|
|
77
|
+
windowSize,
|
|
78
|
+
hopLength,
|
|
79
|
+
fMin,
|
|
80
|
+
maxFreq,
|
|
81
|
+
windowType,
|
|
82
|
+
normalize,
|
|
83
|
+
logScale
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
const timeSteps = spectrogram.length
|
|
87
|
+
|
|
88
|
+
return {
|
|
89
|
+
spectrogram,
|
|
90
|
+
sampleRate,
|
|
91
|
+
nMels,
|
|
92
|
+
timeSteps,
|
|
93
|
+
durationMs: processedAudio.durationMs,
|
|
94
|
+
}
|
|
95
|
+
} catch (error) {
|
|
96
|
+
logger?.error('Error extracting mel spectrogram:', error)
|
|
97
|
+
throw error
|
|
98
|
+
} finally {
|
|
99
|
+
// Close the audio context
|
|
100
|
+
await audioContext.close()
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
return ExpoAudioStreamModule.extractMelSpectrogram(options)
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
/**
|
|
107
|
+
* Computes a mel spectrogram from audio data
|
|
108
|
+
*
|
|
109
|
+
* @experimental This is a placeholder implementation that returns dummy data.
|
|
110
|
+
* The actual implementation will be added in a future version.
|
|
111
|
+
*/
|
|
112
|
+
function computeMelSpectrogram(
|
|
113
|
+
audioData: Float32Array,
|
|
114
|
+
sampleRate: number,
|
|
115
|
+
nMels: number,
|
|
116
|
+
windowSize: number,
|
|
117
|
+
hopLength: number,
|
|
118
|
+
fMin: number,
|
|
119
|
+
fMax: number,
|
|
120
|
+
windowType: 'hann' | 'hamming',
|
|
121
|
+
normalize: boolean,
|
|
122
|
+
logScale: boolean
|
|
123
|
+
): number[][] {
|
|
124
|
+
// Placeholder for the actual implementation
|
|
125
|
+
// This would include:
|
|
126
|
+
// 1. Windowing the audio data using the specified window type
|
|
127
|
+
// 2. Computing the STFT (Short-Time Fourier Transform)
|
|
128
|
+
// 3. Converting to power spectrogram
|
|
129
|
+
// 4. Applying mel filterbanks
|
|
130
|
+
// 5. Taking the logarithm if logScale is true
|
|
131
|
+
// 6. Normalizing if normalize is true
|
|
132
|
+
|
|
133
|
+
// For now, return a dummy implementation
|
|
134
|
+
const numFrames =
|
|
135
|
+
Math.floor((audioData.length - windowSize) / hopLength) + 1
|
|
136
|
+
const spectrogram: number[][] = []
|
|
137
|
+
|
|
138
|
+
// Create dummy mel spectrogram data
|
|
139
|
+
for (let i = 0; i < numFrames; i++) {
|
|
140
|
+
spectrogram.push(Array(nMels).fill(0))
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
return spectrogram
|
|
144
|
+
}
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
import { PreviewOptions, AudioAnalysis } from './AudioAnalysis.types'
|
|
2
|
+
import { extractAudioAnalysis } from './extractAudioAnalysis'
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Generates a simplified preview of the audio waveform for quick visualization.
|
|
6
|
+
* Ideal for UI rendering with a specified number of points.
|
|
7
|
+
*
|
|
8
|
+
* @param options - The options for the preview, including file URI and time range.
|
|
9
|
+
* @returns A promise that resolves to the audio preview data.
|
|
10
|
+
*/
|
|
11
|
+
export async function extractPreview({
|
|
12
|
+
fileUri,
|
|
13
|
+
numberOfPoints = 100,
|
|
14
|
+
startTimeMs = 0,
|
|
15
|
+
endTimeMs = 30000, // First 30 seconds
|
|
16
|
+
decodingOptions,
|
|
17
|
+
logger,
|
|
18
|
+
}: PreviewOptions): Promise<AudioAnalysis> {
|
|
19
|
+
const durationMs = endTimeMs - startTimeMs
|
|
20
|
+
const segmentDurationMs = Math.floor(durationMs / numberOfPoints)
|
|
21
|
+
|
|
22
|
+
// Call extractAudioAnalysis with calculated parameters
|
|
23
|
+
const analysis = await extractAudioAnalysis({
|
|
24
|
+
fileUri,
|
|
25
|
+
startTimeMs,
|
|
26
|
+
endTimeMs,
|
|
27
|
+
logger,
|
|
28
|
+
segmentDurationMs,
|
|
29
|
+
decodingOptions,
|
|
30
|
+
})
|
|
31
|
+
|
|
32
|
+
// Transform the result into AudioPreview format
|
|
33
|
+
return analysis
|
|
34
|
+
}
|