flareverb 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flareverb/__init__.py +1 -0
- flareverb/analysis.py +678 -0
- flareverb/config/config.py +338 -0
- flareverb/generate.py +470 -0
- flareverb/reverb.py +816 -0
- flareverb/sampling.py +187 -0
- flareverb/utils.py +443 -0
- flareverb-0.0.1.dist-info/METADATA +42 -0
- flareverb-0.0.1.dist-info/RECORD +11 -0
- flareverb-0.0.1.dist-info/WHEEL +4 -0
- flareverb-0.0.1.dist-info/licenses/LICENSE +21 -0
flareverb/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .config import *
|
flareverb/analysis.py
ADDED
|
@@ -0,0 +1,678 @@
|
|
|
1
|
+
import sys
|
|
2
|
+
import numpy as np
|
|
3
|
+
from typing import Union, Optional, Tuple
|
|
4
|
+
|
|
5
|
+
import torch
|
|
6
|
+
import torch.nn.functional as F
|
|
7
|
+
from torch.fft import rfft, irfft
|
|
8
|
+
from scipy.signal import spectrogram
|
|
9
|
+
from scipy.stats import linregress
|
|
10
|
+
|
|
11
|
+
from flareverb.utils import (
|
|
12
|
+
ms_to_samps,
|
|
13
|
+
filterbank,
|
|
14
|
+
discard_last_n_percent)
|
|
15
|
+
|
|
16
|
+
Tensor = torch.Tensor
|
|
17
|
+
NDArray = np.ndarray
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def schroeder_backward_int(
|
|
21
|
+
x: Union[Tensor, NDArray],
|
|
22
|
+
energy_norm: bool = True,
|
|
23
|
+
subtract_noise: bool = False,
|
|
24
|
+
noise_level: float = 0.0,
|
|
25
|
+
) -> Tuple[Union[Tensor, NDArray], Union[Tensor, NDArray]]:
|
|
26
|
+
"""
|
|
27
|
+
Compute the backward integration of the squared impulse response (Schroeder integration).
|
|
28
|
+
|
|
29
|
+
Parameters
|
|
30
|
+
----------
|
|
31
|
+
x : Union[Tensor, NDArray]
|
|
32
|
+
Input signal (impulse response.
|
|
33
|
+
energy_norm : bool, optional
|
|
34
|
+
If True, normalize the output to its maximum value (default: True).
|
|
35
|
+
subtract_noise : bool, optional
|
|
36
|
+
If True, subtract the squared noise level from the squared signal (default: False).
|
|
37
|
+
noise_level : float, optional
|
|
38
|
+
The noise level to subtract if subtract_noise is True (default: 0.0).
|
|
39
|
+
|
|
40
|
+
Returns
|
|
41
|
+
-------
|
|
42
|
+
tuple of Union[Tensor, NDArray]
|
|
43
|
+
Tuple containing the backward integrated and normalized array, and the normalization value(s) used.
|
|
44
|
+
"""
|
|
45
|
+
if isinstance(x, torch.Tensor):
|
|
46
|
+
return _schroeder_backward_int_torch(x, energy_norm, subtract_noise, noise_level)
|
|
47
|
+
else:
|
|
48
|
+
return _schroeder_backward_int_numpy(x, energy_norm, subtract_noise, noise_level)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def _schroeder_backward_int_torch(
|
|
52
|
+
x: Tensor,
|
|
53
|
+
energy_norm: bool,
|
|
54
|
+
subtract_noise: bool,
|
|
55
|
+
noise_level: float,
|
|
56
|
+
) -> Tuple[Tensor, Tensor]:
|
|
57
|
+
"""
|
|
58
|
+
PyTorch implementation of Schroeder backward integration.
|
|
59
|
+
|
|
60
|
+
This function computes the Schroeder backward integration for PyTorch tensors.
|
|
61
|
+
The integration is performed by flipping the signal, computing the cumulative
|
|
62
|
+
sum of squared values, and then flipping back.
|
|
63
|
+
|
|
64
|
+
Parameters
|
|
65
|
+
----------
|
|
66
|
+
x : Tensor
|
|
67
|
+
Input signal tensor to be integrated.
|
|
68
|
+
energy_norm : bool
|
|
69
|
+
If True, normalize the output to its maximum value.
|
|
70
|
+
subtract_noise : bool
|
|
71
|
+
If True, subtract the squared noise level from the squared signal.
|
|
72
|
+
noise_level : float
|
|
73
|
+
The noise level to subtract if subtract_noise is True.
|
|
74
|
+
|
|
75
|
+
Returns
|
|
76
|
+
-------
|
|
77
|
+
Tuple[Tensor, Tensor]
|
|
78
|
+
A tuple containing:
|
|
79
|
+
- out: The backward integrated and normalized signal
|
|
80
|
+
- norm_vals: The normalization values used (maximum values per channel)
|
|
81
|
+
|
|
82
|
+
Notes
|
|
83
|
+
-----
|
|
84
|
+
- If subtract_noise is True, noise_level^2 is subtracted from the squared signal
|
|
85
|
+
- Normalization is useful for t60 estimation from the EDC
|
|
86
|
+
"""
|
|
87
|
+
out = torch.flip(x, dims=[1])
|
|
88
|
+
if subtract_noise:
|
|
89
|
+
out_sqrd = out ** 2 - noise_level ** 2
|
|
90
|
+
else:
|
|
91
|
+
out_sqrd = out ** 2
|
|
92
|
+
out = torch.cumsum(out_sqrd, dim=1)
|
|
93
|
+
out = torch.flip(out, dims=[1])
|
|
94
|
+
|
|
95
|
+
# Normalize to 1
|
|
96
|
+
if energy_norm:
|
|
97
|
+
norm_vals = torch.max(out, dim=1, keepdim=True)[0] # per channel
|
|
98
|
+
else:
|
|
99
|
+
norm_vals = torch.ones_like(out)
|
|
100
|
+
|
|
101
|
+
return out / norm_vals, norm_vals
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def _schroeder_backward_int_numpy(
|
|
105
|
+
x: NDArray,
|
|
106
|
+
energy_norm: bool,
|
|
107
|
+
subtract_noise: bool,
|
|
108
|
+
noise_level: float,
|
|
109
|
+
) -> Tuple[NDArray, NDArray]:
|
|
110
|
+
"""
|
|
111
|
+
NumPy implementation of Schroeder backward integration.
|
|
112
|
+
|
|
113
|
+
This function computes the Schroeder backward integration for NumPy arrays.
|
|
114
|
+
The integration is performed by flipping the signal, computing the cumulative
|
|
115
|
+
sum of squared values, and then flipping back.
|
|
116
|
+
|
|
117
|
+
Parameters
|
|
118
|
+
----------
|
|
119
|
+
x : NDArray
|
|
120
|
+
Input signal array to be integrated.
|
|
121
|
+
energy_norm : bool
|
|
122
|
+
If True, normalize the output to its maximum value.
|
|
123
|
+
subtract_noise : bool
|
|
124
|
+
If True, subtract the squared noise level from the squared signal.
|
|
125
|
+
noise_level : float
|
|
126
|
+
The noise level to subtract if subtract_noise is True.
|
|
127
|
+
|
|
128
|
+
Returns
|
|
129
|
+
-------
|
|
130
|
+
Tuple[NDArray, NDArray]
|
|
131
|
+
A tuple containing:
|
|
132
|
+
- out: The backward integrated and normalized signal
|
|
133
|
+
- norm_vals: The normalization values used (maximum values per channel)
|
|
134
|
+
|
|
135
|
+
Notes
|
|
136
|
+
-----
|
|
137
|
+
- If subtract_noise is True, noise_level^2 is subtracted from the squared signal
|
|
138
|
+
- Normalization is useful for t60 estimation from the EDC
|
|
139
|
+
"""
|
|
140
|
+
out = np.flip(x, axis=1)
|
|
141
|
+
if subtract_noise:
|
|
142
|
+
out_sqrd = out ** 2 - noise_level ** 2
|
|
143
|
+
else:
|
|
144
|
+
out_sqrd = out ** 2
|
|
145
|
+
out = np.cumsum(out_sqrd, axis=1)
|
|
146
|
+
out = np.flip(out, axis=1)
|
|
147
|
+
|
|
148
|
+
# Normalize to 1
|
|
149
|
+
if energy_norm:
|
|
150
|
+
norm_vals = np.max(out, keepdims=True, axis=1) # per channel
|
|
151
|
+
else:
|
|
152
|
+
norm_vals = np.ones_like(out)
|
|
153
|
+
|
|
154
|
+
return out / norm_vals, norm_vals
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def compute_edc(
|
|
158
|
+
x: Union[Tensor, NDArray],
|
|
159
|
+
use_filterbank: bool = False,
|
|
160
|
+
compensate_fbnk_energy: bool = True,
|
|
161
|
+
n_fractions: int = 1,
|
|
162
|
+
f_min: int = 63,
|
|
163
|
+
f_max: int = 16000,
|
|
164
|
+
fs: int = 48000,
|
|
165
|
+
energy_norm: bool = True,
|
|
166
|
+
subtract_noise: bool = False,
|
|
167
|
+
noise_level: float = 0.0,
|
|
168
|
+
) -> Union[Tensor, NDArray]:
|
|
169
|
+
"""
|
|
170
|
+
Compute the Energy Decay Curve (EDC) in dB from an input signal.
|
|
171
|
+
|
|
172
|
+
The Energy Decay Curve shows how the energy of a room impulse response
|
|
173
|
+
decays over time. It is computed using Schroeder backward integration
|
|
174
|
+
and can optionally use frequency band filtering for multi-band analysis.
|
|
175
|
+
|
|
176
|
+
Parameters
|
|
177
|
+
----------
|
|
178
|
+
x : Union[Tensor, NDArray]
|
|
179
|
+
Input signal (room impulse response) to analyze.
|
|
180
|
+
use_filterbank : bool, optional
|
|
181
|
+
If True, apply filterbank processing to compute EDCs for multiple
|
|
182
|
+
frequency bands. Default is False.
|
|
183
|
+
compensate_fbnk_energy : bool, optional
|
|
184
|
+
If True, compensate for energy loss in filterbank processing.
|
|
185
|
+
Only used when use_filterbank is True. Default is True.
|
|
186
|
+
n_fractions : int, optional
|
|
187
|
+
Number of fractions per octave for filterbank analysis.
|
|
188
|
+
Only used when use_filterbank is True. Default is 1 (full octave).
|
|
189
|
+
f_min : int, optional
|
|
190
|
+
Minimum frequency for filterbank analysis in Hz.
|
|
191
|
+
Only used when use_filterbank is True. Default is 63 Hz.
|
|
192
|
+
f_max : int, optional
|
|
193
|
+
Maximum frequency for filterbank analysis in Hz.
|
|
194
|
+
Only used when use_filterbank is True. Default is 16000 Hz.
|
|
195
|
+
fs : int, optional
|
|
196
|
+
Sampling rate in Hz. Default is 48000 Hz.
|
|
197
|
+
energy_norm : bool, optional
|
|
198
|
+
If True, normalize the output to its maximum value. Default is True.
|
|
199
|
+
subtract_noise : bool, optional
|
|
200
|
+
If True, subtract the squared noise level from the squared signal.
|
|
201
|
+
Default is False.
|
|
202
|
+
noise_level : float, optional
|
|
203
|
+
The noise level to subtract if subtract_noise is True. Default is 0.0.
|
|
204
|
+
|
|
205
|
+
Returns
|
|
206
|
+
-------
|
|
207
|
+
Union[Tensor, NDArray]
|
|
208
|
+
The energy decay curve in dB. If use_filterbank is True, returns
|
|
209
|
+
EDCs for multiple frequency bands with shape (n_bands, time).
|
|
210
|
+
Otherwise, returns a single EDC with shape (time,).
|
|
211
|
+
|
|
212
|
+
Notes
|
|
213
|
+
-----
|
|
214
|
+
- The function removes the last 0.5 permille of samples to avoid filtering artifacts
|
|
215
|
+
- Schroeder backward integration is used to compute the energy decay
|
|
216
|
+
- The result is converted to dB using 10 * log10()
|
|
217
|
+
"""
|
|
218
|
+
# Remove filtering artefacts (last 0.5 permille)
|
|
219
|
+
out = discard_last_n_percent(x, 0.5)
|
|
220
|
+
|
|
221
|
+
if use_filterbank:
|
|
222
|
+
# Use filterbank to compute EDCs
|
|
223
|
+
out, _ = filterbank(out, n_fractions=n_fractions, f_min=f_min, f_max=f_max,
|
|
224
|
+
sample_rate=fs, compensate_energy=compensate_fbnk_energy)
|
|
225
|
+
|
|
226
|
+
# compute EDCs
|
|
227
|
+
out, _ = schroeder_backward_int(out, energy_norm, subtract_noise, noise_level)
|
|
228
|
+
|
|
229
|
+
# get energy in dB
|
|
230
|
+
if isinstance(out, torch.Tensor):
|
|
231
|
+
out = 10 * torch.log10(out + 1e-32)
|
|
232
|
+
else:
|
|
233
|
+
out = 10 * np.log10(out + 1e-32)
|
|
234
|
+
|
|
235
|
+
return out
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
def compute_edr(
|
|
239
|
+
x: Union[Tensor, NDArray],
|
|
240
|
+
energy_norm: bool = True,
|
|
241
|
+
subtract_noise: bool = False,
|
|
242
|
+
noise_level: float = 0.0,
|
|
243
|
+
) -> Union[Tensor, NDArray]:
|
|
244
|
+
"""
|
|
245
|
+
Compute the Energy Decay Relief (EDR) in dB from an input signal using STFT.
|
|
246
|
+
|
|
247
|
+
The Energy Decay Relief provides a time-frequency representation of how
|
|
248
|
+
the energy decays over time and frequency. It is computed by applying
|
|
249
|
+
Schroeder backward integration to the magnitude spectrogram.
|
|
250
|
+
|
|
251
|
+
Parameters
|
|
252
|
+
----------
|
|
253
|
+
x : Union[Tensor, NDArray]
|
|
254
|
+
Input signal (room impulse response) to analyze.
|
|
255
|
+
energy_norm : bool, optional
|
|
256
|
+
If True, normalize the output to its maximum value. Default is True.
|
|
257
|
+
subtract_noise : bool, optional
|
|
258
|
+
If True, subtract the squared noise level from the squared signal.
|
|
259
|
+
Default is False.
|
|
260
|
+
noise_level : float, optional
|
|
261
|
+
The noise level to subtract if subtract_noise is True. Default is 0.0.
|
|
262
|
+
|
|
263
|
+
Returns
|
|
264
|
+
-------
|
|
265
|
+
Union[Tensor, NDArray]
|
|
266
|
+
The energy decay relief in dB. The output has shape (frequency_bins, time_frames)
|
|
267
|
+
representing the energy decay over time for each frequency bin.
|
|
268
|
+
|
|
269
|
+
Notes
|
|
270
|
+
-----
|
|
271
|
+
- The function removes the last 0.5 permille of samples to avoid filtering artifacts
|
|
272
|
+
- Short-time Fourier transform (STFT) is used to obtain the time-frequency representation
|
|
273
|
+
- Schroeder backward integration is applied to the magnitude spectrogram
|
|
274
|
+
- The result is converted to dB using 10 * log10()
|
|
275
|
+
"""
|
|
276
|
+
# Remove filtering artefacts (last 0.5 permille)
|
|
277
|
+
out = discard_last_n_percent(x, 0.5)
|
|
278
|
+
|
|
279
|
+
if isinstance(out, torch.Tensor):
|
|
280
|
+
# PyTorch STFT implementation
|
|
281
|
+
stft_mag = _stft_torch(out)
|
|
282
|
+
else:
|
|
283
|
+
# NumPy STFT using scipy
|
|
284
|
+
_, _, stft_mag = spectrogram(out, nperseg=1028, noverlap=int(1028 * 0.75), mode='magnitude', axis=1)
|
|
285
|
+
stft_mag = torch.tensor(stft_mag)
|
|
286
|
+
|
|
287
|
+
# compute EDRs
|
|
288
|
+
out, _ = schroeder_backward_int(stft_mag, energy_norm, subtract_noise, noise_level)
|
|
289
|
+
|
|
290
|
+
return 10*torch.log10(out)
|
|
291
|
+
|
|
292
|
+
|
|
293
|
+
def _stft_torch(x: Tensor, nperseg: int = 1028, noverlap: int = None) -> Tensor:
|
|
294
|
+
"""
|
|
295
|
+
PyTorch implementation of STFT magnitude computation.
|
|
296
|
+
|
|
297
|
+
This function computes the Short-time Fourier transform magnitude using PyTorch.
|
|
298
|
+
It provides a time-frequency representation of the input signal using overlapping
|
|
299
|
+
windows and FFT computation.
|
|
300
|
+
|
|
301
|
+
Parameters
|
|
302
|
+
----------
|
|
303
|
+
x : Tensor
|
|
304
|
+
Input signal tensor to be analyzed.
|
|
305
|
+
nperseg : int, optional
|
|
306
|
+
Length of each segment (window length) in samples. Default is 1028.
|
|
307
|
+
noverlap : int, optional
|
|
308
|
+
Number of points to overlap between segments. If None, defaults to
|
|
309
|
+
75% of nperseg. Default is None.
|
|
310
|
+
|
|
311
|
+
Returns
|
|
312
|
+
-------
|
|
313
|
+
Tensor
|
|
314
|
+
STFT magnitude tensor with shape (frequency_bins, time_frames).
|
|
315
|
+
"""
|
|
316
|
+
if noverlap is None:
|
|
317
|
+
noverlap = int(nperseg * 0.75)
|
|
318
|
+
|
|
319
|
+
hop_length = nperseg - noverlap
|
|
320
|
+
|
|
321
|
+
# Pad the signal
|
|
322
|
+
pad_length = nperseg // 2
|
|
323
|
+
x_padded = F.pad(x, (0, 0, pad_length, pad_length, 0, 0))
|
|
324
|
+
|
|
325
|
+
# Create windows
|
|
326
|
+
window = torch.hann_window(nperseg, dtype=x.dtype, device=x.device)
|
|
327
|
+
|
|
328
|
+
# Compute STFT
|
|
329
|
+
stft = torch.stft(x_padded.squeeze(), n_fft=nperseg, hop_length=hop_length,
|
|
330
|
+
window=window, return_complex=True, center=False)
|
|
331
|
+
|
|
332
|
+
return torch.abs(stft)
|
|
333
|
+
|
|
334
|
+
|
|
335
|
+
def estimate_rt60(
|
|
336
|
+
edc_db: Union[Tensor, NDArray],
|
|
337
|
+
time: Union[Tensor, NDArray],
|
|
338
|
+
decay_start_db: float = -5,
|
|
339
|
+
decay_end_db: float = -35
|
|
340
|
+
) -> Tuple[float, float, float, Union[Tensor, NDArray]]:
|
|
341
|
+
"""
|
|
342
|
+
Estimate the reverberation time (RT60) from an Energy Decay Curve (EDC) using linear regression.
|
|
343
|
+
|
|
344
|
+
RT60 is the time required for the sound pressure level to decrease by 60 dB.
|
|
345
|
+
This function estimates RT60 by fitting a linear regression to the decay portion
|
|
346
|
+
of the energy decay curve.
|
|
347
|
+
|
|
348
|
+
Parameters
|
|
349
|
+
----------
|
|
350
|
+
edc_db : Union[Tensor, NDArray]
|
|
351
|
+
Energy decay curve in dB. Should be a monotonically decreasing curve.
|
|
352
|
+
time : Union[Tensor, NDArray]
|
|
353
|
+
Time vector corresponding to the EDC samples in seconds.
|
|
354
|
+
decay_start_db : float, optional
|
|
355
|
+
Starting decay level in dB for the linear fit. The fit begins when the
|
|
356
|
+
EDC drops below this level. Default is -5 dB.
|
|
357
|
+
decay_end_db : float, optional
|
|
358
|
+
Ending decay level in dB for the linear fit. The fit ends when the
|
|
359
|
+
EDC drops below this level. Default is -35 dB.
|
|
360
|
+
|
|
361
|
+
Returns
|
|
362
|
+
-------
|
|
363
|
+
Tuple[float, float, float, Union[Tensor, NDArray]]
|
|
364
|
+
A tuple containing:
|
|
365
|
+
- rt60 : float
|
|
366
|
+
Estimated RT60 in seconds. Returns infinity if no valid decay range is found.
|
|
367
|
+
- slope : float
|
|
368
|
+
Slope of the linear fit in dB/s.
|
|
369
|
+
- intercept : float
|
|
370
|
+
Y-intercept of the linear fit.
|
|
371
|
+
- valid_range : Union[Tensor, NDArray]
|
|
372
|
+
Boolean array indicating the samples used for the fit.
|
|
373
|
+
|
|
374
|
+
Notes
|
|
375
|
+
-----
|
|
376
|
+
- The function finds the range where the EDC is between decay_start_db and decay_end_db
|
|
377
|
+
- Linear regression is performed on this range to estimate the decay rate
|
|
378
|
+
- RT60 is calculated as -60 / slope (the time for 60 dB decay)
|
|
379
|
+
- If no valid range is found, RT60 is set to infinity
|
|
380
|
+
- The decay range should be chosen to avoid the initial build-up and noise floor
|
|
381
|
+
- Typical values for T60 from a 30dB range are -5 dB to -35 dB, but may need
|
|
382
|
+
adjustment for different signals
|
|
383
|
+
"""
|
|
384
|
+
valid_range = (edc_db < decay_start_db) & (edc_db > decay_end_db)
|
|
385
|
+
|
|
386
|
+
if not torch.any(valid_range):
|
|
387
|
+
return float('inf'), 0.0, 0.0, valid_range
|
|
388
|
+
|
|
389
|
+
if isinstance(edc_db, torch.Tensor):
|
|
390
|
+
# Convert to numpy for linregress
|
|
391
|
+
time_valid = time[valid_range.squeeze()].cpu().numpy()
|
|
392
|
+
edc_valid = edc_db[valid_range].cpu().numpy()
|
|
393
|
+
else:
|
|
394
|
+
time_valid = time[valid_range.squeeze()]
|
|
395
|
+
edc_valid = edc_db[valid_range]
|
|
396
|
+
|
|
397
|
+
slope, intercept, *_ = linregress(time_valid, edc_valid)
|
|
398
|
+
rt60 = -60 / slope if slope != 0 else float('inf')
|
|
399
|
+
|
|
400
|
+
return rt60, slope, intercept, valid_range
|
|
401
|
+
|
|
402
|
+
|
|
403
|
+
def normalized_echo_density(
|
|
404
|
+
rir: Union[Tensor, NDArray],
|
|
405
|
+
fs: float,
|
|
406
|
+
window_length_ms: float = 30,
|
|
407
|
+
use_local_avg: bool = True
|
|
408
|
+
) -> Union[Tensor, NDArray]:
|
|
409
|
+
"""
|
|
410
|
+
Compute the normalized echo density profile as defined by Abel.
|
|
411
|
+
|
|
412
|
+
Echo density measures how the density of reflections changes over time in a
|
|
413
|
+
room impulse response. The normalized echo density provides a quantitative
|
|
414
|
+
measure of the temporal evolution
|
|
415
|
+
|
|
416
|
+
Parameters
|
|
417
|
+
----------
|
|
418
|
+
rir : Union[Tensor, NDArray]
|
|
419
|
+
Room impulse response to analyze.
|
|
420
|
+
fs : float
|
|
421
|
+
Sampling rate in Hz.
|
|
422
|
+
window_length_ms : float, optional
|
|
423
|
+
Length of the analysis window in milliseconds. Default is 30 ms.
|
|
424
|
+
use_local_avg : bool, optional
|
|
425
|
+
If True, use local average for weighted standard deviation calculation.
|
|
426
|
+
This provides better estimates of the local signal characteristics.
|
|
427
|
+
Default is True.
|
|
428
|
+
|
|
429
|
+
Returns
|
|
430
|
+
-------
|
|
431
|
+
Union[Tensor, NDArray]
|
|
432
|
+
Normalized echo density profile. The output has the same length as the
|
|
433
|
+
input RIR and represents the echo density at each time point.
|
|
434
|
+
|
|
435
|
+
Notes
|
|
436
|
+
-----
|
|
437
|
+
- The function uses a sliding window approach to analyze the RIR
|
|
438
|
+
- For each window position, it computes the weighted standard deviation
|
|
439
|
+
- Echo density is calculated as the ratio of samples above the threshold
|
|
440
|
+
- The result is normalized by the complementary error function constant (0.3173)
|
|
441
|
+
- This metric is useful for analyzing the temporal evolution of a RIR
|
|
442
|
+
"""
|
|
443
|
+
if isinstance(rir, torch.Tensor):
|
|
444
|
+
rir = rir.cpu().numpy() # Convert to NumPy for processing
|
|
445
|
+
|
|
446
|
+
def weighted_std(signal: NDArray, window_func: NDArray, use_local_avg: bool):
|
|
447
|
+
"""Return the weighted standard deviation of a signal."""
|
|
448
|
+
if use_local_avg:
|
|
449
|
+
average = np.average(signal, weights=window_func, axis=1)
|
|
450
|
+
variance = np.average((signal - average)**2, weights=window_func, axis=1)
|
|
451
|
+
else:
|
|
452
|
+
variance = np.average((signal)**2, weights=window_func, axis=1)
|
|
453
|
+
return np.sqrt(variance)
|
|
454
|
+
|
|
455
|
+
# erfc(1/√2)
|
|
456
|
+
ERFC = 0.3173
|
|
457
|
+
window_length_samps = ms_to_samps(window_length_ms, fs)
|
|
458
|
+
|
|
459
|
+
# Ensure window length is odd for symmetric windowing
|
|
460
|
+
if not window_length_samps % 2:
|
|
461
|
+
window_length_samps += 1
|
|
462
|
+
|
|
463
|
+
half_window = int((window_length_samps - 1) / 2)
|
|
464
|
+
|
|
465
|
+
# Pad the RIR to handle windowing at the edges
|
|
466
|
+
padded_rir = np.pad(rir, ((0, 0), (half_window, half_window), (0, 0)), mode='constant')
|
|
467
|
+
|
|
468
|
+
# Prepare output array and window function
|
|
469
|
+
output = np.zeros(rir.shape[1] + 2 * half_window)
|
|
470
|
+
window_func = np.hanning(window_length_samps)
|
|
471
|
+
window_func = window_func / np.sum(window_func)
|
|
472
|
+
|
|
473
|
+
# Slide window across RIR and compute normalized echo density
|
|
474
|
+
for cursor in range(len(rir)):
|
|
475
|
+
frame = padded_rir[:, cursor:cursor + window_length_samps, :]
|
|
476
|
+
std = weighted_std(frame, window_func, use_local_avg)
|
|
477
|
+
# Count samples above weighted std, weighted by window
|
|
478
|
+
count = np.sum((np.abs(frame) > std) * window_func)
|
|
479
|
+
# Normalize by ERFC constant
|
|
480
|
+
output[cursor] = (1 / ERFC) * count
|
|
481
|
+
|
|
482
|
+
ned = output[:-window_length_samps]
|
|
483
|
+
return ned
|
|
484
|
+
|
|
485
|
+
def compute_clarity_parameters(rir: Union[Tensor, NDArray], fs: float) -> tuple:
|
|
486
|
+
"""
|
|
487
|
+
Compute clarity parameters (C50, C80) from a room impulse response.
|
|
488
|
+
|
|
489
|
+
Clarity parameters measure the ratio of early to late arriving sound energy.
|
|
490
|
+
C50 and C80 are calculated using 50ms and 80ms time boundaries respectively.
|
|
491
|
+
Higher values indicate better speech intelligibility and music clarity.
|
|
492
|
+
|
|
493
|
+
Parameters
|
|
494
|
+
----------
|
|
495
|
+
rir : Union[Tensor, NDArray]
|
|
496
|
+
Room impulse response to analyze.
|
|
497
|
+
fs : float
|
|
498
|
+
Sampling rate in Hz.
|
|
499
|
+
|
|
500
|
+
Returns
|
|
501
|
+
-------
|
|
502
|
+
tuple
|
|
503
|
+
A tuple containing:
|
|
504
|
+
- c50 : float
|
|
505
|
+
Clarity index at 50ms boundary in dB
|
|
506
|
+
- c80 : float
|
|
507
|
+
Clarity index at 80ms boundary in dB
|
|
508
|
+
"""
|
|
509
|
+
# Time boundaries in samples
|
|
510
|
+
t50_samples = int(50 * fs / 1000)
|
|
511
|
+
t80_samples = int(80 * fs / 1000)
|
|
512
|
+
|
|
513
|
+
# Early and late energy
|
|
514
|
+
if isinstance(rir, torch.Tensor):
|
|
515
|
+
early_energy_50 = torch.sum(rir[:, :t50_samples] ** 2)
|
|
516
|
+
late_energy_50 = torch.sum(rir[:, t50_samples:] ** 2)
|
|
517
|
+
|
|
518
|
+
early_energy_80 = torch.sum(rir[:, :t80_samples] ** 2)
|
|
519
|
+
late_energy_80 = torch.sum(rir[:, t80_samples:] ** 2)
|
|
520
|
+
|
|
521
|
+
# Clarity parameters
|
|
522
|
+
c50 = 10 * torch.log10(early_energy_50 / (late_energy_50 + 1e-32))
|
|
523
|
+
c80 = 10 * torch.log10(early_energy_80 / (late_energy_80 + 1e-32))
|
|
524
|
+
else:
|
|
525
|
+
early_energy_50 = np.sum(rir[:, :t50_samples] ** 2)
|
|
526
|
+
late_energy_50 = np.sum(rir[:, t50_samples:] ** 2)
|
|
527
|
+
|
|
528
|
+
early_energy_80 = np.sum(rir[:, :t80_samples] ** 2)
|
|
529
|
+
late_energy_80 = np.sum(rir[:, t80_samples:] ** 2)
|
|
530
|
+
|
|
531
|
+
# Clarity parameters
|
|
532
|
+
c50 = 10 * np.log10(early_energy_50 / (late_energy_50 + 1e-32))
|
|
533
|
+
c80 = 10 * np.log10(early_energy_80 / (late_energy_80 + 1e-32))
|
|
534
|
+
|
|
535
|
+
return c50, c80
|
|
536
|
+
|
|
537
|
+
def compute_definition_parameters(rir: Union[Tensor, NDArray], fs: int, interval_ms = 50) -> tuple:
|
|
538
|
+
"""
|
|
539
|
+
Compute definition parameters (D50, D80) from a room impulse response.
|
|
540
|
+
|
|
541
|
+
Definition parameters measure the ratio of early to total sound energy.
|
|
542
|
+
D50 and D80 are calculated using 50ms and 80ms time boundaries respectively.
|
|
543
|
+
These parameters are related to clarity but use total energy as the denominator.
|
|
544
|
+
|
|
545
|
+
Parameters
|
|
546
|
+
----------
|
|
547
|
+
rir : Union[Tensor, NDArray]
|
|
548
|
+
Room impulse response to analyze.
|
|
549
|
+
fs : int
|
|
550
|
+
Sampling rate in Hz.
|
|
551
|
+
interval_ms : int, optional
|
|
552
|
+
Time boundary in milliseconds for the definition calculation.
|
|
553
|
+
Default is 50 ms (D50).
|
|
554
|
+
|
|
555
|
+
Returns
|
|
556
|
+
-------
|
|
557
|
+
tuple
|
|
558
|
+
A tuple containing:
|
|
559
|
+
- D : float
|
|
560
|
+
Definition parameter (ratio of early to total energy)
|
|
561
|
+
"""
|
|
562
|
+
# Time boundaries in samples
|
|
563
|
+
|
|
564
|
+
|
|
565
|
+
t_samples = int(interval_ms * fs / 1000)
|
|
566
|
+
|
|
567
|
+
# Early and total energy
|
|
568
|
+
if isinstance(rir, torch.Tensor):
|
|
569
|
+
early_energy = torch.sum(rir[:, :t_samples] ** 2)
|
|
570
|
+
total_energy = torch.sum(rir ** 2)
|
|
571
|
+
else:
|
|
572
|
+
early_energy = np.sum(rir[:, :t_samples] ** 2)
|
|
573
|
+
total_energy = np.sum(rir ** 2)
|
|
574
|
+
|
|
575
|
+
# Definition parameters
|
|
576
|
+
D = early_energy / (total_energy + 1e-32)
|
|
577
|
+
|
|
578
|
+
return D
|
|
579
|
+
|
|
580
|
+
# Analysis class for better organization
|
|
581
|
+
class AcousticAnalyzer:
|
|
582
|
+
"""
|
|
583
|
+
A comprehensive acoustic analysis class for computing various acoustic parameters
|
|
584
|
+
from room impulse responses.
|
|
585
|
+
|
|
586
|
+
This class provides methods to analyze room impulse responses and compute
|
|
587
|
+
standard acoustic parameters including RT60, clarity, definition, echo density,
|
|
588
|
+
and energy decay curves.
|
|
589
|
+
|
|
590
|
+
Attributes
|
|
591
|
+
----------
|
|
592
|
+
fs : int
|
|
593
|
+
Sampling rate in Hz used for all calculations.
|
|
594
|
+
device : str
|
|
595
|
+
Device ('cpu' or 'cuda') for PyTorch computations.
|
|
596
|
+
|
|
597
|
+
Methods
|
|
598
|
+
-------
|
|
599
|
+
analyze_rir(rir)
|
|
600
|
+
Perform comprehensive analysis of a room impulse response.
|
|
601
|
+
|
|
602
|
+
Notes
|
|
603
|
+
-----
|
|
604
|
+
- The class automatically handles both PyTorch tensors and NumPy arrays
|
|
605
|
+
"""
|
|
606
|
+
|
|
607
|
+
def __init__(self, fs: int = 48000, device: str = 'cpu'):
|
|
608
|
+
"""
|
|
609
|
+
Initialize the acoustic analyzer.
|
|
610
|
+
|
|
611
|
+
Parameters
|
|
612
|
+
----------
|
|
613
|
+
fs : int
|
|
614
|
+
Sampling rate in Hz
|
|
615
|
+
device : str
|
|
616
|
+
Device to use for PyTorch computations ('cpu' or 'cuda')
|
|
617
|
+
"""
|
|
618
|
+
self.fs = fs
|
|
619
|
+
self.device = device
|
|
620
|
+
|
|
621
|
+
def analyze_rir(self, rir: Union[Tensor, NDArray]) -> dict:
|
|
622
|
+
"""
|
|
623
|
+
Perform comprehensive analysis of a room impulse response.
|
|
624
|
+
|
|
625
|
+
This method computes all standard acoustic parameters from a room impulse
|
|
626
|
+
response, including energy decay curves, clarity, definition, echo density,
|
|
627
|
+
and reverberation time.
|
|
628
|
+
|
|
629
|
+
Parameters
|
|
630
|
+
----------
|
|
631
|
+
rir : Union[Tensor, NDArray]
|
|
632
|
+
Room impulse response to analyze. Can be 1D, 2D, or 3D.
|
|
633
|
+
The method automatically reshapes to 3D format (batch, time, channels).
|
|
634
|
+
|
|
635
|
+
Returns
|
|
636
|
+
-------
|
|
637
|
+
dict
|
|
638
|
+
Dictionary containing all computed acoustic parameters:
|
|
639
|
+
- 'edc': Energy Decay Curve in dB
|
|
640
|
+
- 'edr': Energy Decay Relief in dB (time-frequency representation)
|
|
641
|
+
- 'ned': Normalized Echo Density profile
|
|
642
|
+
- 'c50': Clarity index at 50ms boundary in dB
|
|
643
|
+
- 'c80': Clarity index at 80ms boundary in dB
|
|
644
|
+
- 'd50': Definition parameter at 50ms boundary (ratio)
|
|
645
|
+
- 'rt60': Reverberation time in seconds
|
|
646
|
+
"""
|
|
647
|
+
# Ensure 3D shape (batch, time, channels)
|
|
648
|
+
if rir.ndim == 1:
|
|
649
|
+
rir = rir[None, :, None]
|
|
650
|
+
elif rir.ndim == 2:
|
|
651
|
+
rir = rir[:, :, None]
|
|
652
|
+
results = {}
|
|
653
|
+
|
|
654
|
+
# Convert to tensor if needed
|
|
655
|
+
if isinstance(rir, NDArray):
|
|
656
|
+
rir_tensor = torch.from_numpy(rir).to(self.device)
|
|
657
|
+
else:
|
|
658
|
+
rir_tensor = rir.to(self.device)
|
|
659
|
+
|
|
660
|
+
# Compute EDC
|
|
661
|
+
results['edc'] = compute_edc(rir_tensor, fs=self.fs)
|
|
662
|
+
|
|
663
|
+
# Compute EDR
|
|
664
|
+
results['edr'] = compute_edr(rir_tensor)
|
|
665
|
+
|
|
666
|
+
# Compute normalized echo density
|
|
667
|
+
results['ned'] = normalized_echo_density(rir_tensor, self.fs)
|
|
668
|
+
|
|
669
|
+
## compute clarity index at 50ms and 80ms
|
|
670
|
+
results['c50'], results['c80'] = compute_clarity_parameters(rir_tensor, self.fs)
|
|
671
|
+
## compute definition
|
|
672
|
+
results['d50'] = compute_definition_parameters(rir_tensor, self.fs)
|
|
673
|
+
# Estimate RT60
|
|
674
|
+
time_vector = torch.arange(results['edc'].shape[1], dtype=results['edc'].dtype, device=self.device) / self.fs
|
|
675
|
+
rt60, *_ = estimate_rt60(results['edc'], time_vector)
|
|
676
|
+
results['rt60'] = rt60
|
|
677
|
+
|
|
678
|
+
return results
|