flareverb 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
flareverb/__init__.py ADDED
@@ -0,0 +1 @@
1
+ from .config import *
flareverb/analysis.py ADDED
@@ -0,0 +1,678 @@
1
+ import sys
2
+ import numpy as np
3
+ from typing import Union, Optional, Tuple
4
+
5
+ import torch
6
+ import torch.nn.functional as F
7
+ from torch.fft import rfft, irfft
8
+ from scipy.signal import spectrogram
9
+ from scipy.stats import linregress
10
+
11
+ from flareverb.utils import (
12
+ ms_to_samps,
13
+ filterbank,
14
+ discard_last_n_percent)
15
+
16
+ Tensor = torch.Tensor
17
+ NDArray = np.ndarray
18
+
19
+
20
+ def schroeder_backward_int(
21
+ x: Union[Tensor, NDArray],
22
+ energy_norm: bool = True,
23
+ subtract_noise: bool = False,
24
+ noise_level: float = 0.0,
25
+ ) -> Tuple[Union[Tensor, NDArray], Union[Tensor, NDArray]]:
26
+ """
27
+ Compute the backward integration of the squared impulse response (Schroeder integration).
28
+
29
+ Parameters
30
+ ----------
31
+ x : Union[Tensor, NDArray]
32
+ Input signal (impulse response.
33
+ energy_norm : bool, optional
34
+ If True, normalize the output to its maximum value (default: True).
35
+ subtract_noise : bool, optional
36
+ If True, subtract the squared noise level from the squared signal (default: False).
37
+ noise_level : float, optional
38
+ The noise level to subtract if subtract_noise is True (default: 0.0).
39
+
40
+ Returns
41
+ -------
42
+ tuple of Union[Tensor, NDArray]
43
+ Tuple containing the backward integrated and normalized array, and the normalization value(s) used.
44
+ """
45
+ if isinstance(x, torch.Tensor):
46
+ return _schroeder_backward_int_torch(x, energy_norm, subtract_noise, noise_level)
47
+ else:
48
+ return _schroeder_backward_int_numpy(x, energy_norm, subtract_noise, noise_level)
49
+
50
+
51
+ def _schroeder_backward_int_torch(
52
+ x: Tensor,
53
+ energy_norm: bool,
54
+ subtract_noise: bool,
55
+ noise_level: float,
56
+ ) -> Tuple[Tensor, Tensor]:
57
+ """
58
+ PyTorch implementation of Schroeder backward integration.
59
+
60
+ This function computes the Schroeder backward integration for PyTorch tensors.
61
+ The integration is performed by flipping the signal, computing the cumulative
62
+ sum of squared values, and then flipping back.
63
+
64
+ Parameters
65
+ ----------
66
+ x : Tensor
67
+ Input signal tensor to be integrated.
68
+ energy_norm : bool
69
+ If True, normalize the output to its maximum value.
70
+ subtract_noise : bool
71
+ If True, subtract the squared noise level from the squared signal.
72
+ noise_level : float
73
+ The noise level to subtract if subtract_noise is True.
74
+
75
+ Returns
76
+ -------
77
+ Tuple[Tensor, Tensor]
78
+ A tuple containing:
79
+ - out: The backward integrated and normalized signal
80
+ - norm_vals: The normalization values used (maximum values per channel)
81
+
82
+ Notes
83
+ -----
84
+ - If subtract_noise is True, noise_level^2 is subtracted from the squared signal
85
+ - Normalization is useful for t60 estimation from the EDC
86
+ """
87
+ out = torch.flip(x, dims=[1])
88
+ if subtract_noise:
89
+ out_sqrd = out ** 2 - noise_level ** 2
90
+ else:
91
+ out_sqrd = out ** 2
92
+ out = torch.cumsum(out_sqrd, dim=1)
93
+ out = torch.flip(out, dims=[1])
94
+
95
+ # Normalize to 1
96
+ if energy_norm:
97
+ norm_vals = torch.max(out, dim=1, keepdim=True)[0] # per channel
98
+ else:
99
+ norm_vals = torch.ones_like(out)
100
+
101
+ return out / norm_vals, norm_vals
102
+
103
+
104
+ def _schroeder_backward_int_numpy(
105
+ x: NDArray,
106
+ energy_norm: bool,
107
+ subtract_noise: bool,
108
+ noise_level: float,
109
+ ) -> Tuple[NDArray, NDArray]:
110
+ """
111
+ NumPy implementation of Schroeder backward integration.
112
+
113
+ This function computes the Schroeder backward integration for NumPy arrays.
114
+ The integration is performed by flipping the signal, computing the cumulative
115
+ sum of squared values, and then flipping back.
116
+
117
+ Parameters
118
+ ----------
119
+ x : NDArray
120
+ Input signal array to be integrated.
121
+ energy_norm : bool
122
+ If True, normalize the output to its maximum value.
123
+ subtract_noise : bool
124
+ If True, subtract the squared noise level from the squared signal.
125
+ noise_level : float
126
+ The noise level to subtract if subtract_noise is True.
127
+
128
+ Returns
129
+ -------
130
+ Tuple[NDArray, NDArray]
131
+ A tuple containing:
132
+ - out: The backward integrated and normalized signal
133
+ - norm_vals: The normalization values used (maximum values per channel)
134
+
135
+ Notes
136
+ -----
137
+ - If subtract_noise is True, noise_level^2 is subtracted from the squared signal
138
+ - Normalization is useful for t60 estimation from the EDC
139
+ """
140
+ out = np.flip(x, axis=1)
141
+ if subtract_noise:
142
+ out_sqrd = out ** 2 - noise_level ** 2
143
+ else:
144
+ out_sqrd = out ** 2
145
+ out = np.cumsum(out_sqrd, axis=1)
146
+ out = np.flip(out, axis=1)
147
+
148
+ # Normalize to 1
149
+ if energy_norm:
150
+ norm_vals = np.max(out, keepdims=True, axis=1) # per channel
151
+ else:
152
+ norm_vals = np.ones_like(out)
153
+
154
+ return out / norm_vals, norm_vals
155
+
156
+
157
+ def compute_edc(
158
+ x: Union[Tensor, NDArray],
159
+ use_filterbank: bool = False,
160
+ compensate_fbnk_energy: bool = True,
161
+ n_fractions: int = 1,
162
+ f_min: int = 63,
163
+ f_max: int = 16000,
164
+ fs: int = 48000,
165
+ energy_norm: bool = True,
166
+ subtract_noise: bool = False,
167
+ noise_level: float = 0.0,
168
+ ) -> Union[Tensor, NDArray]:
169
+ """
170
+ Compute the Energy Decay Curve (EDC) in dB from an input signal.
171
+
172
+ The Energy Decay Curve shows how the energy of a room impulse response
173
+ decays over time. It is computed using Schroeder backward integration
174
+ and can optionally use frequency band filtering for multi-band analysis.
175
+
176
+ Parameters
177
+ ----------
178
+ x : Union[Tensor, NDArray]
179
+ Input signal (room impulse response) to analyze.
180
+ use_filterbank : bool, optional
181
+ If True, apply filterbank processing to compute EDCs for multiple
182
+ frequency bands. Default is False.
183
+ compensate_fbnk_energy : bool, optional
184
+ If True, compensate for energy loss in filterbank processing.
185
+ Only used when use_filterbank is True. Default is True.
186
+ n_fractions : int, optional
187
+ Number of fractions per octave for filterbank analysis.
188
+ Only used when use_filterbank is True. Default is 1 (full octave).
189
+ f_min : int, optional
190
+ Minimum frequency for filterbank analysis in Hz.
191
+ Only used when use_filterbank is True. Default is 63 Hz.
192
+ f_max : int, optional
193
+ Maximum frequency for filterbank analysis in Hz.
194
+ Only used when use_filterbank is True. Default is 16000 Hz.
195
+ fs : int, optional
196
+ Sampling rate in Hz. Default is 48000 Hz.
197
+ energy_norm : bool, optional
198
+ If True, normalize the output to its maximum value. Default is True.
199
+ subtract_noise : bool, optional
200
+ If True, subtract the squared noise level from the squared signal.
201
+ Default is False.
202
+ noise_level : float, optional
203
+ The noise level to subtract if subtract_noise is True. Default is 0.0.
204
+
205
+ Returns
206
+ -------
207
+ Union[Tensor, NDArray]
208
+ The energy decay curve in dB. If use_filterbank is True, returns
209
+ EDCs for multiple frequency bands with shape (n_bands, time).
210
+ Otherwise, returns a single EDC with shape (time,).
211
+
212
+ Notes
213
+ -----
214
+ - The function removes the last 0.5 permille of samples to avoid filtering artifacts
215
+ - Schroeder backward integration is used to compute the energy decay
216
+ - The result is converted to dB using 10 * log10()
217
+ """
218
+ # Remove filtering artefacts (last 0.5 permille)
219
+ out = discard_last_n_percent(x, 0.5)
220
+
221
+ if use_filterbank:
222
+ # Use filterbank to compute EDCs
223
+ out, _ = filterbank(out, n_fractions=n_fractions, f_min=f_min, f_max=f_max,
224
+ sample_rate=fs, compensate_energy=compensate_fbnk_energy)
225
+
226
+ # compute EDCs
227
+ out, _ = schroeder_backward_int(out, energy_norm, subtract_noise, noise_level)
228
+
229
+ # get energy in dB
230
+ if isinstance(out, torch.Tensor):
231
+ out = 10 * torch.log10(out + 1e-32)
232
+ else:
233
+ out = 10 * np.log10(out + 1e-32)
234
+
235
+ return out
236
+
237
+
238
+ def compute_edr(
239
+ x: Union[Tensor, NDArray],
240
+ energy_norm: bool = True,
241
+ subtract_noise: bool = False,
242
+ noise_level: float = 0.0,
243
+ ) -> Union[Tensor, NDArray]:
244
+ """
245
+ Compute the Energy Decay Relief (EDR) in dB from an input signal using STFT.
246
+
247
+ The Energy Decay Relief provides a time-frequency representation of how
248
+ the energy decays over time and frequency. It is computed by applying
249
+ Schroeder backward integration to the magnitude spectrogram.
250
+
251
+ Parameters
252
+ ----------
253
+ x : Union[Tensor, NDArray]
254
+ Input signal (room impulse response) to analyze.
255
+ energy_norm : bool, optional
256
+ If True, normalize the output to its maximum value. Default is True.
257
+ subtract_noise : bool, optional
258
+ If True, subtract the squared noise level from the squared signal.
259
+ Default is False.
260
+ noise_level : float, optional
261
+ The noise level to subtract if subtract_noise is True. Default is 0.0.
262
+
263
+ Returns
264
+ -------
265
+ Union[Tensor, NDArray]
266
+ The energy decay relief in dB. The output has shape (frequency_bins, time_frames)
267
+ representing the energy decay over time for each frequency bin.
268
+
269
+ Notes
270
+ -----
271
+ - The function removes the last 0.5 permille of samples to avoid filtering artifacts
272
+ - Short-time Fourier transform (STFT) is used to obtain the time-frequency representation
273
+ - Schroeder backward integration is applied to the magnitude spectrogram
274
+ - The result is converted to dB using 10 * log10()
275
+ """
276
+ # Remove filtering artefacts (last 0.5 permille)
277
+ out = discard_last_n_percent(x, 0.5)
278
+
279
+ if isinstance(out, torch.Tensor):
280
+ # PyTorch STFT implementation
281
+ stft_mag = _stft_torch(out)
282
+ else:
283
+ # NumPy STFT using scipy
284
+ _, _, stft_mag = spectrogram(out, nperseg=1028, noverlap=int(1028 * 0.75), mode='magnitude', axis=1)
285
+ stft_mag = torch.tensor(stft_mag)
286
+
287
+ # compute EDRs
288
+ out, _ = schroeder_backward_int(stft_mag, energy_norm, subtract_noise, noise_level)
289
+
290
+ return 10*torch.log10(out)
291
+
292
+
293
+ def _stft_torch(x: Tensor, nperseg: int = 1028, noverlap: int = None) -> Tensor:
294
+ """
295
+ PyTorch implementation of STFT magnitude computation.
296
+
297
+ This function computes the Short-time Fourier transform magnitude using PyTorch.
298
+ It provides a time-frequency representation of the input signal using overlapping
299
+ windows and FFT computation.
300
+
301
+ Parameters
302
+ ----------
303
+ x : Tensor
304
+ Input signal tensor to be analyzed.
305
+ nperseg : int, optional
306
+ Length of each segment (window length) in samples. Default is 1028.
307
+ noverlap : int, optional
308
+ Number of points to overlap between segments. If None, defaults to
309
+ 75% of nperseg. Default is None.
310
+
311
+ Returns
312
+ -------
313
+ Tensor
314
+ STFT magnitude tensor with shape (frequency_bins, time_frames).
315
+ """
316
+ if noverlap is None:
317
+ noverlap = int(nperseg * 0.75)
318
+
319
+ hop_length = nperseg - noverlap
320
+
321
+ # Pad the signal
322
+ pad_length = nperseg // 2
323
+ x_padded = F.pad(x, (0, 0, pad_length, pad_length, 0, 0))
324
+
325
+ # Create windows
326
+ window = torch.hann_window(nperseg, dtype=x.dtype, device=x.device)
327
+
328
+ # Compute STFT
329
+ stft = torch.stft(x_padded.squeeze(), n_fft=nperseg, hop_length=hop_length,
330
+ window=window, return_complex=True, center=False)
331
+
332
+ return torch.abs(stft)
333
+
334
+
335
+ def estimate_rt60(
336
+ edc_db: Union[Tensor, NDArray],
337
+ time: Union[Tensor, NDArray],
338
+ decay_start_db: float = -5,
339
+ decay_end_db: float = -35
340
+ ) -> Tuple[float, float, float, Union[Tensor, NDArray]]:
341
+ """
342
+ Estimate the reverberation time (RT60) from an Energy Decay Curve (EDC) using linear regression.
343
+
344
+ RT60 is the time required for the sound pressure level to decrease by 60 dB.
345
+ This function estimates RT60 by fitting a linear regression to the decay portion
346
+ of the energy decay curve.
347
+
348
+ Parameters
349
+ ----------
350
+ edc_db : Union[Tensor, NDArray]
351
+ Energy decay curve in dB. Should be a monotonically decreasing curve.
352
+ time : Union[Tensor, NDArray]
353
+ Time vector corresponding to the EDC samples in seconds.
354
+ decay_start_db : float, optional
355
+ Starting decay level in dB for the linear fit. The fit begins when the
356
+ EDC drops below this level. Default is -5 dB.
357
+ decay_end_db : float, optional
358
+ Ending decay level in dB for the linear fit. The fit ends when the
359
+ EDC drops below this level. Default is -35 dB.
360
+
361
+ Returns
362
+ -------
363
+ Tuple[float, float, float, Union[Tensor, NDArray]]
364
+ A tuple containing:
365
+ - rt60 : float
366
+ Estimated RT60 in seconds. Returns infinity if no valid decay range is found.
367
+ - slope : float
368
+ Slope of the linear fit in dB/s.
369
+ - intercept : float
370
+ Y-intercept of the linear fit.
371
+ - valid_range : Union[Tensor, NDArray]
372
+ Boolean array indicating the samples used for the fit.
373
+
374
+ Notes
375
+ -----
376
+ - The function finds the range where the EDC is between decay_start_db and decay_end_db
377
+ - Linear regression is performed on this range to estimate the decay rate
378
+ - RT60 is calculated as -60 / slope (the time for 60 dB decay)
379
+ - If no valid range is found, RT60 is set to infinity
380
+ - The decay range should be chosen to avoid the initial build-up and noise floor
381
+ - Typical values for T60 from a 30dB range are -5 dB to -35 dB, but may need
382
+ adjustment for different signals
383
+ """
384
+ valid_range = (edc_db < decay_start_db) & (edc_db > decay_end_db)
385
+
386
+ if not torch.any(valid_range):
387
+ return float('inf'), 0.0, 0.0, valid_range
388
+
389
+ if isinstance(edc_db, torch.Tensor):
390
+ # Convert to numpy for linregress
391
+ time_valid = time[valid_range.squeeze()].cpu().numpy()
392
+ edc_valid = edc_db[valid_range].cpu().numpy()
393
+ else:
394
+ time_valid = time[valid_range.squeeze()]
395
+ edc_valid = edc_db[valid_range]
396
+
397
+ slope, intercept, *_ = linregress(time_valid, edc_valid)
398
+ rt60 = -60 / slope if slope != 0 else float('inf')
399
+
400
+ return rt60, slope, intercept, valid_range
401
+
402
+
403
+ def normalized_echo_density(
404
+ rir: Union[Tensor, NDArray],
405
+ fs: float,
406
+ window_length_ms: float = 30,
407
+ use_local_avg: bool = True
408
+ ) -> Union[Tensor, NDArray]:
409
+ """
410
+ Compute the normalized echo density profile as defined by Abel.
411
+
412
+ Echo density measures how the density of reflections changes over time in a
413
+ room impulse response. The normalized echo density provides a quantitative
414
+ measure of the temporal evolution
415
+
416
+ Parameters
417
+ ----------
418
+ rir : Union[Tensor, NDArray]
419
+ Room impulse response to analyze.
420
+ fs : float
421
+ Sampling rate in Hz.
422
+ window_length_ms : float, optional
423
+ Length of the analysis window in milliseconds. Default is 30 ms.
424
+ use_local_avg : bool, optional
425
+ If True, use local average for weighted standard deviation calculation.
426
+ This provides better estimates of the local signal characteristics.
427
+ Default is True.
428
+
429
+ Returns
430
+ -------
431
+ Union[Tensor, NDArray]
432
+ Normalized echo density profile. The output has the same length as the
433
+ input RIR and represents the echo density at each time point.
434
+
435
+ Notes
436
+ -----
437
+ - The function uses a sliding window approach to analyze the RIR
438
+ - For each window position, it computes the weighted standard deviation
439
+ - Echo density is calculated as the ratio of samples above the threshold
440
+ - The result is normalized by the complementary error function constant (0.3173)
441
+ - This metric is useful for analyzing the temporal evolution of a RIR
442
+ """
443
+ if isinstance(rir, torch.Tensor):
444
+ rir = rir.cpu().numpy() # Convert to NumPy for processing
445
+
446
+ def weighted_std(signal: NDArray, window_func: NDArray, use_local_avg: bool):
447
+ """Return the weighted standard deviation of a signal."""
448
+ if use_local_avg:
449
+ average = np.average(signal, weights=window_func, axis=1)
450
+ variance = np.average((signal - average)**2, weights=window_func, axis=1)
451
+ else:
452
+ variance = np.average((signal)**2, weights=window_func, axis=1)
453
+ return np.sqrt(variance)
454
+
455
+ # erfc(1/√2)
456
+ ERFC = 0.3173
457
+ window_length_samps = ms_to_samps(window_length_ms, fs)
458
+
459
+ # Ensure window length is odd for symmetric windowing
460
+ if not window_length_samps % 2:
461
+ window_length_samps += 1
462
+
463
+ half_window = int((window_length_samps - 1) / 2)
464
+
465
+ # Pad the RIR to handle windowing at the edges
466
+ padded_rir = np.pad(rir, ((0, 0), (half_window, half_window), (0, 0)), mode='constant')
467
+
468
+ # Prepare output array and window function
469
+ output = np.zeros(rir.shape[1] + 2 * half_window)
470
+ window_func = np.hanning(window_length_samps)
471
+ window_func = window_func / np.sum(window_func)
472
+
473
+ # Slide window across RIR and compute normalized echo density
474
+ for cursor in range(len(rir)):
475
+ frame = padded_rir[:, cursor:cursor + window_length_samps, :]
476
+ std = weighted_std(frame, window_func, use_local_avg)
477
+ # Count samples above weighted std, weighted by window
478
+ count = np.sum((np.abs(frame) > std) * window_func)
479
+ # Normalize by ERFC constant
480
+ output[cursor] = (1 / ERFC) * count
481
+
482
+ ned = output[:-window_length_samps]
483
+ return ned
484
+
485
+ def compute_clarity_parameters(rir: Union[Tensor, NDArray], fs: float) -> tuple:
486
+ """
487
+ Compute clarity parameters (C50, C80) from a room impulse response.
488
+
489
+ Clarity parameters measure the ratio of early to late arriving sound energy.
490
+ C50 and C80 are calculated using 50ms and 80ms time boundaries respectively.
491
+ Higher values indicate better speech intelligibility and music clarity.
492
+
493
+ Parameters
494
+ ----------
495
+ rir : Union[Tensor, NDArray]
496
+ Room impulse response to analyze.
497
+ fs : float
498
+ Sampling rate in Hz.
499
+
500
+ Returns
501
+ -------
502
+ tuple
503
+ A tuple containing:
504
+ - c50 : float
505
+ Clarity index at 50ms boundary in dB
506
+ - c80 : float
507
+ Clarity index at 80ms boundary in dB
508
+ """
509
+ # Time boundaries in samples
510
+ t50_samples = int(50 * fs / 1000)
511
+ t80_samples = int(80 * fs / 1000)
512
+
513
+ # Early and late energy
514
+ if isinstance(rir, torch.Tensor):
515
+ early_energy_50 = torch.sum(rir[:, :t50_samples] ** 2)
516
+ late_energy_50 = torch.sum(rir[:, t50_samples:] ** 2)
517
+
518
+ early_energy_80 = torch.sum(rir[:, :t80_samples] ** 2)
519
+ late_energy_80 = torch.sum(rir[:, t80_samples:] ** 2)
520
+
521
+ # Clarity parameters
522
+ c50 = 10 * torch.log10(early_energy_50 / (late_energy_50 + 1e-32))
523
+ c80 = 10 * torch.log10(early_energy_80 / (late_energy_80 + 1e-32))
524
+ else:
525
+ early_energy_50 = np.sum(rir[:, :t50_samples] ** 2)
526
+ late_energy_50 = np.sum(rir[:, t50_samples:] ** 2)
527
+
528
+ early_energy_80 = np.sum(rir[:, :t80_samples] ** 2)
529
+ late_energy_80 = np.sum(rir[:, t80_samples:] ** 2)
530
+
531
+ # Clarity parameters
532
+ c50 = 10 * np.log10(early_energy_50 / (late_energy_50 + 1e-32))
533
+ c80 = 10 * np.log10(early_energy_80 / (late_energy_80 + 1e-32))
534
+
535
+ return c50, c80
536
+
537
+ def compute_definition_parameters(rir: Union[Tensor, NDArray], fs: int, interval_ms = 50) -> tuple:
538
+ """
539
+ Compute definition parameters (D50, D80) from a room impulse response.
540
+
541
+ Definition parameters measure the ratio of early to total sound energy.
542
+ D50 and D80 are calculated using 50ms and 80ms time boundaries respectively.
543
+ These parameters are related to clarity but use total energy as the denominator.
544
+
545
+ Parameters
546
+ ----------
547
+ rir : Union[Tensor, NDArray]
548
+ Room impulse response to analyze.
549
+ fs : int
550
+ Sampling rate in Hz.
551
+ interval_ms : int, optional
552
+ Time boundary in milliseconds for the definition calculation.
553
+ Default is 50 ms (D50).
554
+
555
+ Returns
556
+ -------
557
+ tuple
558
+ A tuple containing:
559
+ - D : float
560
+ Definition parameter (ratio of early to total energy)
561
+ """
562
+ # Time boundaries in samples
563
+
564
+
565
+ t_samples = int(interval_ms * fs / 1000)
566
+
567
+ # Early and total energy
568
+ if isinstance(rir, torch.Tensor):
569
+ early_energy = torch.sum(rir[:, :t_samples] ** 2)
570
+ total_energy = torch.sum(rir ** 2)
571
+ else:
572
+ early_energy = np.sum(rir[:, :t_samples] ** 2)
573
+ total_energy = np.sum(rir ** 2)
574
+
575
+ # Definition parameters
576
+ D = early_energy / (total_energy + 1e-32)
577
+
578
+ return D
579
+
580
+ # Analysis class for better organization
581
+ class AcousticAnalyzer:
582
+ """
583
+ A comprehensive acoustic analysis class for computing various acoustic parameters
584
+ from room impulse responses.
585
+
586
+ This class provides methods to analyze room impulse responses and compute
587
+ standard acoustic parameters including RT60, clarity, definition, echo density,
588
+ and energy decay curves.
589
+
590
+ Attributes
591
+ ----------
592
+ fs : int
593
+ Sampling rate in Hz used for all calculations.
594
+ device : str
595
+ Device ('cpu' or 'cuda') for PyTorch computations.
596
+
597
+ Methods
598
+ -------
599
+ analyze_rir(rir)
600
+ Perform comprehensive analysis of a room impulse response.
601
+
602
+ Notes
603
+ -----
604
+ - The class automatically handles both PyTorch tensors and NumPy arrays
605
+ """
606
+
607
+ def __init__(self, fs: int = 48000, device: str = 'cpu'):
608
+ """
609
+ Initialize the acoustic analyzer.
610
+
611
+ Parameters
612
+ ----------
613
+ fs : int
614
+ Sampling rate in Hz
615
+ device : str
616
+ Device to use for PyTorch computations ('cpu' or 'cuda')
617
+ """
618
+ self.fs = fs
619
+ self.device = device
620
+
621
+ def analyze_rir(self, rir: Union[Tensor, NDArray]) -> dict:
622
+ """
623
+ Perform comprehensive analysis of a room impulse response.
624
+
625
+ This method computes all standard acoustic parameters from a room impulse
626
+ response, including energy decay curves, clarity, definition, echo density,
627
+ and reverberation time.
628
+
629
+ Parameters
630
+ ----------
631
+ rir : Union[Tensor, NDArray]
632
+ Room impulse response to analyze. Can be 1D, 2D, or 3D.
633
+ The method automatically reshapes to 3D format (batch, time, channels).
634
+
635
+ Returns
636
+ -------
637
+ dict
638
+ Dictionary containing all computed acoustic parameters:
639
+ - 'edc': Energy Decay Curve in dB
640
+ - 'edr': Energy Decay Relief in dB (time-frequency representation)
641
+ - 'ned': Normalized Echo Density profile
642
+ - 'c50': Clarity index at 50ms boundary in dB
643
+ - 'c80': Clarity index at 80ms boundary in dB
644
+ - 'd50': Definition parameter at 50ms boundary (ratio)
645
+ - 'rt60': Reverberation time in seconds
646
+ """
647
+ # Ensure 3D shape (batch, time, channels)
648
+ if rir.ndim == 1:
649
+ rir = rir[None, :, None]
650
+ elif rir.ndim == 2:
651
+ rir = rir[:, :, None]
652
+ results = {}
653
+
654
+ # Convert to tensor if needed
655
+ if isinstance(rir, NDArray):
656
+ rir_tensor = torch.from_numpy(rir).to(self.device)
657
+ else:
658
+ rir_tensor = rir.to(self.device)
659
+
660
+ # Compute EDC
661
+ results['edc'] = compute_edc(rir_tensor, fs=self.fs)
662
+
663
+ # Compute EDR
664
+ results['edr'] = compute_edr(rir_tensor)
665
+
666
+ # Compute normalized echo density
667
+ results['ned'] = normalized_echo_density(rir_tensor, self.fs)
668
+
669
+ ## compute clarity index at 50ms and 80ms
670
+ results['c50'], results['c80'] = compute_clarity_parameters(rir_tensor, self.fs)
671
+ ## compute definition
672
+ results['d50'] = compute_definition_parameters(rir_tensor, self.fs)
673
+ # Estimate RT60
674
+ time_vector = torch.arange(results['edc'].shape[1], dtype=results['edc'].dtype, device=self.device) / self.fs
675
+ rt60, *_ = estimate_rt60(results['edc'], time_vector)
676
+ results['rt60'] = rt60
677
+
678
+ return results