python-peass 2.0.1__py3-none-any.whl → 2.0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
peass/__init__.py CHANGED
@@ -1,18 +1,30 @@
1
1
  """
2
2
  python-peass: Perceptual Evaluation methods for Audio Source Separation
3
- A modern, Pythonic port of the PEASS v2.0.1 toolkit [1].
3
+ A modern, Pythonic port of the PEASS v2.0.1 toolkit.
4
4
  """
5
5
 
6
- __version__ = "2.0.1"
6
+ __version__ = "2.0.1.2" # matches peass version, with one more segment for me to edit
7
7
 
8
- from .decomposition import extract_distortion_components
9
- from .metrics import audio_quality_features
10
- from .metrics import calculate_energy_ratios
11
- from .predictor import predict_peass_scores
8
+ from .config import DecomposedFilePaths
9
+ from .config import DecomposedWaveforms
10
+ from .config import DecompositionConfiguration
11
+ from .config import DecompositionResult
12
+ from .config import ModulationProcessingType
13
+ from .config import PerceptualSeparationScores
14
+ from .decomposition import decompose_distortion_components
15
+ from .metrics import calculate_auditory_quality_features
16
+ from .metrics import calculate_bss_eval_energy_ratios
17
+ from .predictor import predict_perceptual_evaluation_scores
12
18
 
13
19
  __all__ = [
14
- "predict_peass_scores",
15
- "extract_distortion_components",
16
- "calculate_energy_ratios",
17
- "audio_quality_features",
20
+ "DecomposedFilePaths",
21
+ "DecomposedWaveforms",
22
+ "DecompositionConfiguration",
23
+ "DecompositionResult",
24
+ "ModulationProcessingType",
25
+ "PerceptualSeparationScores",
26
+ "predict_perceptual_evaluation_scores",
27
+ "decompose_distortion_components",
28
+ "calculate_bss_eval_energy_ratios",
29
+ "calculate_auditory_quality_features",
18
30
  ]
peass/auditory_model.py CHANGED
@@ -1,158 +1,364 @@
1
1
  """
2
- PEASS Auditory Package - Dau 1996/1997 Psychoacoustic Ear Model [1, 2]
2
+ PEASS Auditory Package - Dau 1996/1997 Psychoacoustic Ear Model
3
3
 
4
- This module ports the legacy C/MEX elements (haircell.c, adapt.c) into pure,
5
- performant Python [1, 3]. It simulates the transduction process of the inner hair cells
6
- and the temporal adaptation (forward masking) of the auditory nerve.
4
+ Simulates the transduction process of the inner hair cells and the temporal
5
+ adaptation (forward masking) of the auditory nerve. Uses Numba if available,
6
+ and fails over gracefully to a SciPy/NumPy native vectorization.
7
7
  """
8
8
 
9
+ import math
9
10
  from typing import Tuple
10
11
 
11
12
  import numpy as np
12
13
  import scipy.signal as signal
13
14
 
15
+ from .config import ModulationProcessingType
14
16
  from .gammatone import GammatoneAnalyzer
17
+ from .gammatone import fast_resample_poly
15
18
 
16
- # Check for Numba availability
19
+ # -----------------------------------------------------------------------------
20
+ # NUMBA JIT COMPILATION (WITH SAFE IMPORT FALLBACK)
21
+ # -----------------------------------------------------------------------------
17
22
  try:
18
23
  import numba
19
24
 
20
25
  _HAS_NUMBA = True
26
+
27
+
28
+ @numba.njit(cache=True)
29
+ def _numba_fused_auditory_kernel(
30
+ subband_signals: np.ndarray,
31
+ sampling_frequency_hz: float,
32
+ haircell_filter_gain: float,
33
+ adaptation_bandwidths: np.ndarray,
34
+ absolute_hearing_threshold: float
35
+ ) -> np.ndarray:
36
+ """
37
+ Fused JIT kernel: Half-wave rectification, haircell lowpass,
38
+ and 5-stage non-linear adaptation executing natively in a single pass.
39
+ """
40
+ num_bands, num_samples = subband_signals.shape
41
+ output_signals = np.empty_like(subband_signals)
42
+
43
+ stage_thresholds = np.empty(5, dtype=np.float64)
44
+ stage_gains = np.empty(5, dtype=np.float64)
45
+
46
+ current_threshold = absolute_hearing_threshold
47
+ for stage_idx in range(5):
48
+ current_threshold = math.sqrt(current_threshold)
49
+ stage_thresholds[stage_idx] = current_threshold
50
+ stage_gains[stage_idx] = math.exp(-math.pi * adaptation_bandwidths[stage_idx] / sampling_frequency_hz)
51
+
52
+ haircell_factor = 1.0 - haircell_filter_gain
53
+
54
+ adaptation_factors = np.empty_like(stage_thresholds)
55
+ for band_idx in range(num_bands):
56
+ last_haircell_state = 0.0
57
+ for stage_idx in range(5):
58
+ adaptation_factors[stage_idx] = stage_thresholds[stage_idx]
59
+ for sample_idx in range(num_samples):
60
+ # 1. Half-wave rectification
61
+ current_value = subband_signals[band_idx, sample_idx]
62
+ if current_value < 0.0:
63
+ current_value = 0.0
64
+
65
+ # 2. 1 kHz first-order lowpass filter (haircell transduction)
66
+ current_value = haircell_filter_gain * last_haircell_state + haircell_factor * current_value
67
+ last_haircell_state = current_value
68
+
69
+ # Minimum hearing threshold floor
70
+ if current_value < absolute_hearing_threshold:
71
+ current_value = absolute_hearing_threshold
72
+
73
+ # 3. Unrolled 5-stage non-linear adaptation loops
74
+ for stage_idx in range(5):
75
+ gain_value = stage_gains[stage_idx]
76
+ threshold_value = stage_thresholds[stage_idx]
77
+ active_factor = adaptation_factors[stage_idx]
78
+
79
+ compressed_value = current_value / active_factor
80
+
81
+ adaptation_factors[stage_idx] = max(
82
+ (1.0 - gain_value) * compressed_value + gain_value * active_factor,
83
+ threshold_value
84
+ )
85
+ current_value = compressed_value
86
+
87
+ output_signals[band_idx, sample_idx] = current_value
88
+
89
+ return output_signals
90
+
91
+
92
+ @numba.njit(cache=True)
93
+ def _numba_haircell_transduction_kernel(
94
+ subband_signals: np.ndarray,
95
+ sampling_frequency_hz: float
96
+ ) -> np.ndarray:
97
+ """
98
+ Dedicated JIT-compiled kernel for standalone haircell transduction.
99
+ """
100
+ num_bands, num_samples = subband_signals.shape
101
+ output_signals = np.empty_like(subband_signals)
102
+ haircell_filter_gain = math.exp(-math.pi * 2000.0 / sampling_frequency_hz)
103
+ haircell_factor = 1.0 - haircell_filter_gain
104
+
105
+ for band_idx in range(num_bands):
106
+ last_haircell_state = 0.0
107
+ for sample_idx in range(num_samples):
108
+ current_value = subband_signals[band_idx, sample_idx]
109
+ if current_value < 0.0:
110
+ current_value = 0.0
111
+ current_value = haircell_filter_gain * last_haircell_state + haircell_factor * current_value
112
+ last_haircell_state = current_value
113
+ output_signals[band_idx, sample_idx] = current_value
114
+ return output_signals
115
+
116
+
117
+ @numba.njit(cache=True)
118
+ def _numba_adaptation_loops_kernel(
119
+ subband_signals: np.ndarray,
120
+ sampling_frequency_hz: float,
121
+ adaptation_bandwidths: np.ndarray,
122
+ absolute_hearing_threshold: float
123
+ ) -> np.ndarray:
124
+ """
125
+ Dedicated JIT-compiled kernel for standalone adaptation loops.
126
+ """
127
+ num_bands, num_samples = subband_signals.shape
128
+ output_signals = np.empty_like(subband_signals)
129
+
130
+ stage_thresholds = np.empty(5, dtype=np.float64)
131
+ stage_gains = np.empty(5, dtype=np.float64)
132
+
133
+ current_threshold = absolute_hearing_threshold
134
+ for stage_idx in range(5):
135
+ current_threshold = math.sqrt(current_threshold)
136
+ stage_thresholds[stage_idx] = current_threshold
137
+ stage_gains[stage_idx] = math.exp(-math.pi * adaptation_bandwidths[stage_idx] / sampling_frequency_hz)
138
+
139
+ adaptation_factors = np.empty_like(stage_thresholds)
140
+ for band_idx in range(num_bands):
141
+ for stage_idx in range(5):
142
+ adaptation_factors[stage_idx] = stage_thresholds[stage_idx]
143
+ for sample_idx in range(num_samples):
144
+ current_value = subband_signals[band_idx, sample_idx]
145
+ if current_value < absolute_hearing_threshold:
146
+ current_value = absolute_hearing_threshold
147
+
148
+ for stage_idx in range(5):
149
+ gain_value = stage_gains[stage_idx]
150
+ threshold_value = stage_thresholds[stage_idx]
151
+ active_factor = adaptation_factors[stage_idx]
152
+ compressed_value = current_value / active_factor
153
+
154
+ adaptation_factors[stage_idx] = max(
155
+ (1.0 - gain_value) * compressed_value + gain_value * active_factor,
156
+ threshold_value
157
+ )
158
+ current_value = compressed_value
159
+
160
+ output_signals[band_idx, sample_idx] = current_value
161
+
162
+ return output_signals
163
+
21
164
  except ImportError:
22
165
  _HAS_NUMBA = False
23
166
 
24
- if _HAS_NUMBA:
25
- @numba.jit(nopython=True, cache=True)
26
- def _numba_adaptation_loop(rx: np.ndarray, gain_val: float, sthresh: float, factor: np.ndarray) -> np.ndarray:
27
- num_bands, num_samples = rx.shape
167
+
168
+ # -----------------------------------------------------------------------------
169
+ # PURE PYTHON/SCIPY FALLBACKS
170
+ # -----------------------------------------------------------------------------
171
+ def _fallback_adaptation_loops(
172
+ subband_signals: np.ndarray,
173
+ sampling_frequency_hz: float,
174
+ adaptation_bandwidths: np.ndarray,
175
+ absolute_hearing_threshold: float
176
+ ) -> np.ndarray:
177
+ """
178
+ Pure NumPy fallback for the nonlinear adaptation loops.
179
+ Vectorizes across the frequency bands to mitigate Python loop overhead.
180
+ """
181
+ num_samples = subband_signals.shape[1]
182
+ adapted_signals = np.maximum(subband_signals, absolute_hearing_threshold)
183
+ stage_threshold = absolute_hearing_threshold
184
+
185
+ for stage_idx in range(5):
186
+ adaptation_gain = math.exp(-math.pi * adaptation_bandwidths[stage_idx] / sampling_frequency_hz)
187
+ stage_threshold = math.sqrt(stage_threshold)
188
+ divisor_factors = np.full(subband_signals.shape[0], stage_threshold, dtype=np.float64)
189
+
28
190
  for sample_idx in range(num_samples):
29
- for band_idx in range(num_bands):
30
- val = rx[band_idx, sample_idx] / factor[band_idx]
31
- rx[band_idx, sample_idx] = val
32
- factor[band_idx] = max((1.0 - gain_val) * val + gain_val * factor[band_idx], sthresh)
33
- return rx
191
+ current_values = adapted_signals[:, sample_idx] / divisor_factors
192
+ adapted_signals[:, sample_idx] = current_values
34
193
 
194
+ divisor_factors = np.maximum(
195
+ (1.0 - adaptation_gain) * current_values + adaptation_gain * divisor_factors,
196
+ stage_threshold
197
+ )
35
198
 
36
- def haircell_transduction(subband_signals: np.ndarray, sampling_frequency: float) -> np.ndarray:
37
- """
38
- Models the nonlinear mechanical-to-neural transduction of the inner hair cells.
39
- Replaces haircell.c MEX script [2, 3].
199
+ return adapted_signals
40
200
 
41
- Stages:
42
- 1. Half-wave rectification (simulates unidirectional shearing of hair bundle)
43
- 2. 1 kHz first-order lowpass filter (simulates inner hair cell membrane limits)
44
- """
45
- # % gain=exp(-pi*2000/fs);
46
- # % rx=filter(1-gain,[1 -gain],max(rx,0),[],2);
47
- gain_haircell = np.exp(-np.pi * 2000.0 / sampling_frequency)
48
- b_hc = np.array([1.0 - gain_haircell])
49
- a_hc = np.array([1.0, -gain_haircell])
50
201
 
51
- # Process rectified signals over the sample dimension (axis 1)
202
+ def _fallback_fused_auditory_kernel(
203
+ subband_signals: np.ndarray,
204
+ sampling_frequency_hz: float,
205
+ haircell_filter_gain: float,
206
+ adaptation_bandwidths: np.ndarray,
207
+ absolute_hearing_threshold: float
208
+ ) -> np.ndarray:
209
+ """
210
+ Pure SciPy/NumPy fallback executing identical math utilizing C-backends.
211
+ """
212
+ # 1. Half-wave rectification
52
213
  rectified_signals = np.maximum(subband_signals, 0.0)
53
- return signal.lfilter(b_hc, a_hc, rectified_signals, axis=1)
54
214
 
215
+ # 2. Haircell 1 kHz first-order lowpass filter
216
+ numerator_coefficients = np.array([1.0 - haircell_filter_gain])
217
+ denominator_coefficients = np.array([1.0, -haircell_filter_gain])
218
+ transduced_signals = signal.lfilter(numerator_coefficients, denominator_coefficients, rectified_signals, axis=-1)
55
219
 
56
- def adaptation_loops(subband_signals: np.ndarray, sampling_frequency: float) -> np.ndarray:
57
- """
58
- Simulates the physiological adaptive properties of the auditory nerve.
59
- Replaces adapt.c MEX script [2].
220
+ return _fallback_adaptation_loops(
221
+ transduced_signals, sampling_frequency_hz, adaptation_bandwidths, absolute_hearing_threshold
222
+ )
60
223
 
61
- Runs 5 consecutive non-linear feedback loops modeling forward masking,
62
- vectorized across all bands for optimal execution in Python.
63
- """
64
- dbrange = 100.0
65
- thresh = 10.0 ** (-dbrange / 20.0)
66
- bw_loop = 1.0 / (np.pi * np.array([0.005, 0.05, 0.129, 0.253, 0.5]))
67
224
 
68
- # % rx=max(single(rx),thresh);
69
- rx = np.maximum(subband_signals.astype(np.float32), thresh)
70
- num_bands, num_samples = rx.shape
225
+ # -----------------------------------------------------------------------------
226
+ # EXPOSED API (STRICT PEP-484 TYPING)
227
+ # -----------------------------------------------------------------------------
228
+ def simulate_inner_haircell_transduction(
229
+ subband_signals: np.ndarray,
230
+ sampling_frequency_hz: float
231
+ ) -> np.ndarray:
232
+ """Models the nonlinear mechanical-to-neural transduction of the inner hair cells."""
233
+ if _HAS_NUMBA:
234
+ return _numba_haircell_transduction_kernel(subband_signals, sampling_frequency_hz)
235
+ else:
236
+ rectified_signals = np.maximum(subband_signals, 0.0)
237
+ haircell_filter_gain = math.exp(-math.pi * 2000.0 / sampling_frequency_hz)
238
+ numerator_coefficients = np.array([1.0 - haircell_filter_gain])
239
+ denominator_coefficients = np.array([1.0, -haircell_filter_gain])
240
+ return signal.lfilter(numerator_coefficients, denominator_coefficients, rectified_signals, axis=-1)
71
241
 
72
- # Process each of the 5 adaptive stages
73
- sthresh = thresh
74
- for stage_idx in range(5):
75
- gain_val = np.exp(-np.pi * bw_loop[stage_idx] / sampling_frequency)
76
- sthresh = np.sqrt(sthresh)
77
- factor = np.full(num_bands, sthresh, dtype=np.float32) # divisor factor for each band
78
-
79
- if _HAS_NUMBA:
80
- # Compiled loop executing at native C speeds
81
- rx = _numba_adaptation_loop(rx, float(gain_val), float(sthresh), factor)
82
- else:
83
- # Fallback pure-Python loop
84
- for sample_idx in range(num_samples):
85
- # Divide current sample by current divisor factor
86
- val = rx[:, sample_idx] / factor
87
- rx[:, sample_idx] = val
88
- # Update divisor filter state
89
- factor = np.maximum((1.0 - gain_val) * val + gain_val * factor, sthresh)
90
242
 
91
- # % rx=double(dbrange/(1-sthresh))*(double(rx)-double(sthresh));
92
- return (dbrange / (1.0 - sthresh)) * (rx - sthresh)
243
+ def simulate_auditory_nerve_adaptation(
244
+ subband_signals: np.ndarray,
245
+ sampling_frequency_hz: float
246
+ ) -> np.ndarray:
247
+ """Simulates the physiological adaptive properties of the auditory nerve."""
248
+ decibel_range = 100.0
249
+ absolute_hearing_threshold = 10.0 ** (-decibel_range / 20.0)
250
+ adaptation_loop_bandwidths = 1.0 / (np.pi * np.array([0.005, 0.05, 0.129, 0.253, 0.5]))
251
+
252
+ if _HAS_NUMBA:
253
+ adapted_signals = _numba_adaptation_loops_kernel(
254
+ subband_signals, sampling_frequency_hz, adaptation_loop_bandwidths, absolute_hearing_threshold
255
+ )
256
+ else:
257
+ adapted_signals = _fallback_adaptation_loops(
258
+ subband_signals, sampling_frequency_hz, adaptation_loop_bandwidths, absolute_hearing_threshold
259
+ )
260
+
261
+ final_threshold = absolute_hearing_threshold
262
+ for _ in range(5):
263
+ final_threshold = math.sqrt(final_threshold)
264
+
265
+ return (decibel_range / (1.0 - final_threshold)) * (adapted_signals - final_threshold)
93
266
 
94
267
 
95
- def generate_internal_representation(
268
+ def generate_auditory_internal_representation(
96
269
  signal_data: np.ndarray,
97
- sampling_frequency: float,
98
- modulation_processing_type: str = 'lp'
270
+ sampling_frequency_hz: float,
271
+ modulation_processing_type: ModulationProcessingType = ModulationProcessingType.LOWPASS
99
272
  ) -> Tuple[np.ndarray, float]:
100
- """
101
- Generates the 3D internal auditory representation of a signal.
102
- Equivalent of pemo_internal.m [1].
103
- """
273
+ """Generates the 3D internal auditory representation of a signal."""
104
274
  if len(signal_data.shape) > 1:
105
275
  if signal_data.shape[0] < signal_data.shape[1]:
106
276
  signal_data = signal_data.T
107
277
  signal_data = signal_data.ravel()
108
278
 
109
279
  # Model input scaling (1.0 becomes 100 dB SPL)
110
- signal_data = 10.0 * signal_data
111
-
112
- # Frequency analysis boundaries
113
- fmin = 235.0
114
- fmax = min(0.5 * sampling_frequency, 14500.0)
115
- if sampling_frequency < 3.0 * fmax:
116
- new_fs = int(round(1.5 * sampling_frequency))
117
- signal_data = signal_data.astype(float)
118
- signal_data = signal.resample(signal_data, int(round(len(signal_data) * new_fs / sampling_frequency)))
119
- sampling_frequency = float(new_fs)
120
-
121
- analyzer = GammatoneAnalyzer(sampling_frequency, fmin, 1000.0, fmax, 1.0)
122
- num_bands = len(analyzer.filters)
123
-
124
- # Subband analysis
125
- subbands = np.real(analyzer.process(signal_data))
126
-
127
- # Transduction and Adaptation stages
128
- transduced = haircell_transduction(subbands, sampling_frequency)
129
- adapted = adaptation_loops(transduced, sampling_frequency)
130
-
131
- # Modulation Filtering & Downsampling
132
- if modulation_processing_type == 'fb':
133
- adapted = signal.resample(adapted, int(round(adapted.shape[1] * 800.0 / sampling_frequency)), axis=1)
134
- sampling_frequency = 800.0
135
- center_frequencies_mod = np.concatenate(([0.0, 5.0], 10.0 * (5.0 / 3.0) ** np.arange(6)))
136
- bandwidth_mod = np.concatenate(([5.0, 5.0], 5.0 * (5.0 / 3.0) ** np.arange(6)))
280
+ scaled_signal_data = 10.0 * signal_data
281
+
282
+ minimum_frequency = 235.0
283
+ maximum_frequency = min(0.5 * sampling_frequency_hz, 14500.0)
284
+
285
+ # Decimate using polyphase FIR (avoids global FFT memory spikes)
286
+ if sampling_frequency_hz < 3.0 * maximum_frequency:
287
+ new_sampling_frequency = int(round(1.5 * sampling_frequency_hz))
288
+ # scaled_signal_data = signal.resample_poly(scaled_signal_data, new_sampling_frequency, int(sampling_frequency_hz))
289
+ scaled_signal_data = fast_resample_poly(
290
+ scaled_signal_data, new_sampling_frequency, int(sampling_frequency_hz)
291
+ )
292
+ sampling_frequency_hz = float(new_sampling_frequency)
293
+
294
+ # 1. Gammatone Analysis Filterbank
295
+ analyzer = GammatoneAnalyzer(sampling_frequency_hz, minimum_frequency, 1000.0, maximum_frequency, 1.0)
296
+ subbands = np.real(analyzer.process(scaled_signal_data))
297
+
298
+ # 2 & 3. Fused IHC Transduction and Nerve Adaptation
299
+ haircell_filter_gain = math.exp(-math.pi * 2000.0 / sampling_frequency_hz)
300
+ decibel_range = 100.0
301
+ absolute_hearing_threshold = 10.0 ** (-decibel_range / 20.0)
302
+ adaptation_loop_bandwidths = 1.0 / (np.pi * np.array([0.005, 0.05, 0.129, 0.253, 0.5]))
303
+
304
+ if _HAS_NUMBA:
305
+ adapted_signals = _numba_fused_auditory_kernel(
306
+ subbands, sampling_frequency_hz, haircell_filter_gain,
307
+ adaptation_loop_bandwidths, absolute_hearing_threshold
308
+ )
137
309
  else:
138
- adapted = signal.resample(adapted, int(round(adapted.shape[1] * 100.0 / sampling_frequency)), axis=1)
139
- sampling_frequency = 100.0
140
- center_frequencies_mod = np.array([0.0])
141
- bandwidth_mod = np.array([15.92])
310
+ adapted_signals = _fallback_fused_auditory_kernel(
311
+ subbands, sampling_frequency_hz, haircell_filter_gain,
312
+ adaptation_loop_bandwidths, absolute_hearing_threshold
313
+ )
314
+
315
+ # Global dB offset scaling
316
+ final_threshold = absolute_hearing_threshold
317
+ for _ in range(5):
318
+ final_threshold = math.sqrt(final_threshold)
319
+
320
+ adapted_signals = (decibel_range / (1.0 - final_threshold)) * (adapted_signals - final_threshold)
321
+
322
+ # 4. Modulation Filtering & Polyphase Decimation
323
+ if modulation_processing_type == ModulationProcessingType.FILTERBANK:
324
+ # downsampled_adapted = signal.resample_poly(adapted_signals, 800, int(sampling_frequency_hz), axis=-1)
325
+ downsampled_adapted = fast_resample_poly(adapted_signals, 800, int(sampling_frequency_hz), axis=-1)
326
+ sampling_frequency_hz = 800.0
327
+ modulation_center_frequencies = np.concatenate(([0.0, 5.0], 10.0 * (5.0 / 3.0) ** np.arange(6)))
328
+ modulation_bandwidths = np.concatenate(([5.0, 5.0], 5.0 * (5.0 / 3.0) ** np.arange(6)))
329
+ else:
330
+ # downsampled_adapted = signal.resample_poly(adapted_signals, 100, int(sampling_frequency_hz), axis=-1)
331
+ downsampled_adapted = fast_resample_poly(adapted_signals, 100, int(sampling_frequency_hz), axis=-1)
332
+ sampling_frequency_hz = 100.0
333
+ modulation_center_frequencies = np.array([0.0])
334
+ modulation_bandwidths = np.array([15.92])
335
+
336
+ num_bands = adapted_signals.shape[0]
337
+ num_modulations = len(modulation_center_frequencies)
338
+ num_samples = downsampled_adapted.shape[1]
142
339
 
143
- num_modulations = len(center_frequencies_mod)
144
- num_samples = adapted.shape[1]
145
340
  internal_representation = np.zeros((num_bands, num_samples, num_modulations), dtype=complex)
146
341
 
147
- for m in range(num_modulations):
148
- gain_val = np.exp(-np.pi * bandwidth_mod[m] / sampling_frequency)
149
- b_mod = np.array([1.0 - gain_val])
150
- a_mod = np.array([1.0, -gain_val * np.exp(2j * np.pi * center_frequencies_mod[m] / sampling_frequency)])
151
- internal_representation[:, :, m] = signal.lfilter(b_mod, a_mod, adapted, axis=1)
342
+ for mod_idx in range(num_modulations):
343
+ filter_gain = math.exp(-math.pi * modulation_bandwidths[mod_idx] / sampling_frequency_hz)
344
+ numerator_coeffs = np.array([1.0 - filter_gain])
345
+ denominator_coeffs = np.array([
346
+ 1.0,
347
+ -filter_gain * np.exp(2j * np.pi * modulation_center_frequencies[mod_idx] / sampling_frequency_hz)
348
+ ])
349
+
350
+ # Offloaded to SciPy C-backend
351
+ internal_representation[:, :, mod_idx] = signal.lfilter(
352
+ numerator_coeffs, denominator_coeffs, downsampled_adapted, axis=-1
353
+ )
152
354
 
153
- # Hilbert envelope extraction above 10 Hz
154
- above_10_hz = (center_frequencies_mod > 10.0)
155
- internal_representation[:, :, ~above_10_hz] = np.real(internal_representation[:, :, ~above_10_hz])
156
- internal_representation[:, :, above_10_hz] = np.abs(internal_representation[:, :, above_10_hz])
355
+ channels_above_10_hz = (modulation_center_frequencies > 10.0)
356
+ internal_representation[:, :, ~channels_above_10_hz] = np.real(
357
+ internal_representation[:, :, ~channels_above_10_hz]
358
+ )
359
+ internal_representation[:, :, channels_above_10_hz] = np.abs(
360
+ internal_representation[:, :, channels_above_10_hz]
361
+ )
157
362
 
158
- return internal_representation, sampling_frequency
363
+ # Cast to real float64 since all imaginary parts have been discarded
364
+ return np.real(internal_representation), sampling_frequency_hz
peass/config.py ADDED
@@ -0,0 +1,73 @@
1
+ """
2
+ PEASS Configuration and Data Structures
3
+ """
4
+
5
+ import sys
6
+ from dataclasses import dataclass
7
+ from enum import Enum
8
+ from enum import auto
9
+ from typing import Optional
10
+
11
+ import numpy as np
12
+
13
+ # Dynamically enable slots only on Python 3.10+
14
+ # TODO: drop py3.9 support and just always include slots
15
+ _DATACLASS_KWARGS = {"slots": True} if sys.version_info >= (3, 10) else {}
16
+
17
+
18
+ class ModulationProcessingType(Enum):
19
+ """Defines the type of modulation processing used in the auditory model."""
20
+ LOWPASS = auto()
21
+ FILTERBANK = auto()
22
+
23
+
24
+ @dataclass(**_DATACLASS_KWARGS)
25
+ class DecomposedWaveforms:
26
+ """Holds the in-memory NumPy arrays for the decomposed physical components."""
27
+ true_target: np.ndarray
28
+ target_distortion: np.ndarray
29
+ interference: np.ndarray
30
+ artifacts: np.ndarray
31
+
32
+
33
+ @dataclass(**_DATACLASS_KWARGS)
34
+ class DecomposedFilePaths:
35
+ """Holds the absolute file paths to the generated WAV files on disk."""
36
+ true_target: str
37
+ target_distortion: str
38
+ interference: str
39
+ artifacts: str
40
+
41
+
42
+ @dataclass(**_DATACLASS_KWARGS)
43
+ class DecompositionResult:
44
+ """Wrapper holding both the arrays and optional file paths of a decomposition."""
45
+ waveforms: DecomposedWaveforms
46
+ file_paths: Optional[DecomposedFilePaths] = None
47
+
48
+
49
+ @dataclass(**_DATACLASS_KWARGS)
50
+ class DecompositionConfiguration:
51
+ """Structural configurations for the subband least-squares windowing."""
52
+ destination_directory: str = "./"
53
+ use_two_stage_projection: bool = False
54
+ frame_length_seconds: float = 0.5
55
+ filter_length_seconds: float = 0.04
56
+ shade_in_milliseconds: float = 10.0
57
+ shade_out_milliseconds: float = 10.0
58
+ segmentation_factor: int = 1
59
+
60
+
61
+ @dataclass(**_DATACLASS_KWARGS)
62
+ class PerceptualSeparationScores:
63
+ """Final assessment metrics representing the predicted subjective evaluation."""
64
+ overall_perceptual_score: float
65
+ target_perceptual_score: float
66
+ interference_perceptual_score: float
67
+ artifact_perceptual_score: float
68
+ source_to_distortion_ratio: float
69
+ source_to_spatial_distortion_ratio: float
70
+ source_to_interference_ratio: float
71
+ source_to_artifacts_ratio: float
72
+ decomposition_waveforms: Optional[DecomposedWaveforms] = None
73
+ decomposition_files: Optional[DecomposedFilePaths] = None