python-peass 2.0.1.2__tar.gz → 2.0.1.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {python_peass-2.0.1.2 → python_peass-2.0.1.3}/PKG-INFO +37 -35
- {python_peass-2.0.1.2 → python_peass-2.0.1.3}/README.md +35 -32
- {python_peass-2.0.1.2 → python_peass-2.0.1.3}/peass/__init__.py +1 -1
- {python_peass-2.0.1.2 → python_peass-2.0.1.3}/peass/auditory_model.py +4 -6
- {python_peass-2.0.1.2 → python_peass-2.0.1.3}/peass/config.py +8 -14
- {python_peass-2.0.1.2 → python_peass-2.0.1.3}/peass/decomposition.py +9 -13
- {python_peass-2.0.1.2 → python_peass-2.0.1.3}/peass/gammatone.py +1 -2
- {python_peass-2.0.1.2 → python_peass-2.0.1.3}/peass/metrics.py +4 -6
- {python_peass-2.0.1.2 → python_peass-2.0.1.3}/peass/predictor.py +5 -8
- {python_peass-2.0.1.2 → python_peass-2.0.1.3}/pyproject.toml +1 -2
- {python_peass-2.0.1.2 → python_peass-2.0.1.3}/LICENSE +0 -0
- {python_peass-2.0.1.2 → python_peass-2.0.1.3}/peass/parameters/paramTask1.npz +0 -0
- {python_peass-2.0.1.2 → python_peass-2.0.1.3}/peass/parameters/paramTask2.npz +0 -0
- {python_peass-2.0.1.2 → python_peass-2.0.1.3}/peass/parameters/paramTask3.npz +0 -0
- {python_peass-2.0.1.2 → python_peass-2.0.1.3}/peass/parameters/paramTask4.npz +0 -0
|
@@ -1,15 +1,14 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: python-peass
|
|
3
|
-
Version: 2.0.1.
|
|
3
|
+
Version: 2.0.1.3
|
|
4
4
|
Summary: python-peass: Perceptual Evaluation methods for Audio Source Separation
|
|
5
5
|
Author-email: Avery Khoo <avery.khoo@gmail.com>
|
|
6
|
-
Requires-Python: >=3.
|
|
6
|
+
Requires-Python: >=3.10
|
|
7
7
|
Description-Content-Type: text/markdown
|
|
8
8
|
Classifier: Development Status :: 4 - Beta
|
|
9
9
|
Classifier: Intended Audience :: Science/Research
|
|
10
10
|
Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
|
|
11
11
|
Classifier: Programming Language :: Python :: 3
|
|
12
|
-
Classifier: Programming Language :: Python :: 3.9
|
|
13
12
|
Classifier: Programming Language :: Python :: 3.10
|
|
14
13
|
Classifier: Programming Language :: Python :: 3.11
|
|
15
14
|
Classifier: Programming Language :: Python :: 3.12
|
|
@@ -29,7 +28,7 @@ Provides-Extra: numba
|
|
|
29
28
|
|
|
30
29
|
# python-peass
|
|
31
30
|
|
|
32
|
-
[](https://github.com/averykhoo/python-peass/actions)
|
|
33
32
|
[](https://pypi.org/project/python-peass/)
|
|
34
33
|
|
|
35
34
|
> This project was ported by Gemini 3.5 Flash from
|
|
@@ -37,32 +36,6 @@ Provides-Extra: numba
|
|
|
37
36
|
|
|
38
37
|
A Python port of the **PEASS v2.0.1** (Perceptual Evaluation methods for Audio Source Separation) toolkit [1].
|
|
39
38
|
|
|
40
|
-
This package replaces traditional energy ratio metrics (SDR, SIR, SAR) with perceptually motivated objective scores—
|
|
41
|
-
**OPS, TPS, IPS, and APS**—which align closely with subjective human listening evaluations [1].
|
|
42
|
-
|
|
43
|
-
## Scientific Highlights
|
|
44
|
-
|
|
45
|
-
Traditional evaluation metrics rely purely on linear energy ratios [1].
|
|
46
|
-
However, human hearing relies on non-linear auditory transduction, temporal masking, and cognitive thresholds [2].
|
|
47
|
-
`python-peass` executes a multi-stage cognitive simulation pipeline to assess separation quality:
|
|
48
|
-
|
|
49
|
-
1. **Subband Least-Squares Decomposition:**
|
|
50
|
-
Signals are divided into subbands using a complex-valued Hohmann Gammatone Filterbank [1, 3].
|
|
51
|
-
Overlapping temporal frames are projected onto estimated subspaces to isolate physical target distortion,
|
|
52
|
-
interference, and artifact components [1].
|
|
53
|
-
2. **Inner Hair Cell Transduction:**
|
|
54
|
-
Approximates the shearing limits of physical hair bundles via half-wave rectification and first-order 1 kHz
|
|
55
|
-
membrane-limit lowpass filters [1, 2].
|
|
56
|
-
3. **Auditory Nerve Adaptation:**
|
|
57
|
-
Models physiological forward masking and metabolic neural depletion via five cascaded stages of non-linear feedback
|
|
58
|
-
loops [2].
|
|
59
|
-
4. **Perceptual Assimilation:**
|
|
60
|
-
Models cognitive threshold masking where noise below a target reference threshold is partially assimilated or
|
|
61
|
-
masked [2].
|
|
62
|
-
5. **Score Prediction:**
|
|
63
|
-
Feeds weighted similarity percentiles into a multi-criteria trained sigmoidal neural network to output scores scaled
|
|
64
|
-
from `0` to `100` [1].
|
|
65
|
-
|
|
66
39
|
## Installation
|
|
67
40
|
|
|
68
41
|
For standard execution, you can install the package directly:
|
|
@@ -71,10 +44,11 @@ For standard execution, you can install the package directly:
|
|
|
71
44
|
pip install "python-peass[numba]"
|
|
72
45
|
```
|
|
73
46
|
|
|
74
|
-
If you require high-speed execution (using optimized vector libraries like Intel MKL or Apple Accelerate), it is
|
|
47
|
+
If you require high-speed execution (using optimized vector libraries like Intel MKL or Apple Accelerate), it is
|
|
48
|
+
recommended to install NumPy and SciPy via Conda first, and then install the package:
|
|
75
49
|
|
|
76
50
|
```bash
|
|
77
|
-
conda install
|
|
51
|
+
conda install numpy scipy
|
|
78
52
|
pip install "python-peass[numba]"
|
|
79
53
|
```
|
|
80
54
|
|
|
@@ -96,10 +70,10 @@ estimate_file = "audio/estimated_target.wav"
|
|
|
96
70
|
|
|
97
71
|
scores = predict_perceptual_evaluation_scores(original_files, estimate_file)
|
|
98
72
|
|
|
99
|
-
print(f"Overall Perceptual Score (OPS):
|
|
73
|
+
print(f"Overall Perceptual Score (OPS): {scores.overall_perceptual_score:.1f}/100")
|
|
100
74
|
print(f"Target Preservation Score (TPS): {scores.target_perceptual_score:.1f}/100")
|
|
101
|
-
print(f"Interference Rejection (IPS):
|
|
102
|
-
print(f"Artifact-free Score (APS):
|
|
75
|
+
print(f"Interference Rejection (IPS): {scores.interference_perceptual_score:.1f}/100")
|
|
76
|
+
print(f"Artifact-free Score (APS): {scores.artifact_perceptual_score:.1f}/100")
|
|
103
77
|
```
|
|
104
78
|
|
|
105
79
|
### 2. Score Evaluation with Waveform and File Expositions
|
|
@@ -171,6 +145,34 @@ true_target, target_distortion, interference, artifacts = (
|
|
|
171
145
|
|
|
172
146
|
---
|
|
173
147
|
|
|
148
|
+
## Scientific Highlights
|
|
149
|
+
|
|
150
|
+
Traditional evaluation metrics rely purely on linear energy ratios [1].
|
|
151
|
+
However, human hearing relies on non-linear auditory transduction, temporal masking, and cognitive thresholds [2].
|
|
152
|
+
This package replaces traditional energy ratio metrics (SDR, SIR, SAR) with perceptually motivated objective scores—
|
|
153
|
+
**OPS, TPS, IPS, and APS**—which align closely with subjective human listening evaluations [1].
|
|
154
|
+
|
|
155
|
+
`peass` executes a multi-stage cognitive simulation pipeline to assess separation quality:
|
|
156
|
+
|
|
157
|
+
1. **Subband Least-Squares Decomposition:**
|
|
158
|
+
Signals are divided into subbands using a complex-valued Hohmann Gammatone Filterbank [1, 3].
|
|
159
|
+
Overlapping temporal frames are projected onto estimated subspaces to isolate physical target distortion,
|
|
160
|
+
interference, and artifact components [1].
|
|
161
|
+
2. **Inner Hair Cell Transduction:**
|
|
162
|
+
Approximates the shearing limits of physical hair bundles via half-wave rectification and first-order 1 kHz
|
|
163
|
+
membrane-limit lowpass filters [1, 2].
|
|
164
|
+
3. **Auditory Nerve Adaptation:**
|
|
165
|
+
Models physiological forward masking and metabolic neural depletion via five cascaded stages of non-linear feedback
|
|
166
|
+
loops [2].
|
|
167
|
+
4. **Perceptual Assimilation:**
|
|
168
|
+
Models cognitive threshold masking where noise below a target reference threshold is partially assimilated or
|
|
169
|
+
masked [2].
|
|
170
|
+
5. **Score Prediction:**
|
|
171
|
+
Feeds weighted similarity percentiles into a multi-criteria trained sigmoidal neural network to output scores scaled
|
|
172
|
+
from `0` to `100` [1].
|
|
173
|
+
|
|
174
|
+
---
|
|
175
|
+
|
|
174
176
|
## Test Suite & CI/CD
|
|
175
177
|
|
|
176
178
|
The validation suite implements rigorous numerical, physical, and integration checks.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# python-peass
|
|
2
2
|
|
|
3
|
-
[](https://github.com/averykhoo/python-peass/actions)
|
|
4
4
|
[](https://pypi.org/project/python-peass/)
|
|
5
5
|
|
|
6
6
|
> This project was ported by Gemini 3.5 Flash from
|
|
@@ -8,32 +8,6 @@
|
|
|
8
8
|
|
|
9
9
|
A Python port of the **PEASS v2.0.1** (Perceptual Evaluation methods for Audio Source Separation) toolkit [1].
|
|
10
10
|
|
|
11
|
-
This package replaces traditional energy ratio metrics (SDR, SIR, SAR) with perceptually motivated objective scores—
|
|
12
|
-
**OPS, TPS, IPS, and APS**—which align closely with subjective human listening evaluations [1].
|
|
13
|
-
|
|
14
|
-
## Scientific Highlights
|
|
15
|
-
|
|
16
|
-
Traditional evaluation metrics rely purely on linear energy ratios [1].
|
|
17
|
-
However, human hearing relies on non-linear auditory transduction, temporal masking, and cognitive thresholds [2].
|
|
18
|
-
`python-peass` executes a multi-stage cognitive simulation pipeline to assess separation quality:
|
|
19
|
-
|
|
20
|
-
1. **Subband Least-Squares Decomposition:**
|
|
21
|
-
Signals are divided into subbands using a complex-valued Hohmann Gammatone Filterbank [1, 3].
|
|
22
|
-
Overlapping temporal frames are projected onto estimated subspaces to isolate physical target distortion,
|
|
23
|
-
interference, and artifact components [1].
|
|
24
|
-
2. **Inner Hair Cell Transduction:**
|
|
25
|
-
Approximates the shearing limits of physical hair bundles via half-wave rectification and first-order 1 kHz
|
|
26
|
-
membrane-limit lowpass filters [1, 2].
|
|
27
|
-
3. **Auditory Nerve Adaptation:**
|
|
28
|
-
Models physiological forward masking and metabolic neural depletion via five cascaded stages of non-linear feedback
|
|
29
|
-
loops [2].
|
|
30
|
-
4. **Perceptual Assimilation:**
|
|
31
|
-
Models cognitive threshold masking where noise below a target reference threshold is partially assimilated or
|
|
32
|
-
masked [2].
|
|
33
|
-
5. **Score Prediction:**
|
|
34
|
-
Feeds weighted similarity percentiles into a multi-criteria trained sigmoidal neural network to output scores scaled
|
|
35
|
-
from `0` to `100` [1].
|
|
36
|
-
|
|
37
11
|
## Installation
|
|
38
12
|
|
|
39
13
|
For standard execution, you can install the package directly:
|
|
@@ -42,10 +16,11 @@ For standard execution, you can install the package directly:
|
|
|
42
16
|
pip install "python-peass[numba]"
|
|
43
17
|
```
|
|
44
18
|
|
|
45
|
-
If you require high-speed execution (using optimized vector libraries like Intel MKL or Apple Accelerate), it is
|
|
19
|
+
If you require high-speed execution (using optimized vector libraries like Intel MKL or Apple Accelerate), it is
|
|
20
|
+
recommended to install NumPy and SciPy via Conda first, and then install the package:
|
|
46
21
|
|
|
47
22
|
```bash
|
|
48
|
-
conda install
|
|
23
|
+
conda install numpy scipy
|
|
49
24
|
pip install "python-peass[numba]"
|
|
50
25
|
```
|
|
51
26
|
|
|
@@ -67,10 +42,10 @@ estimate_file = "audio/estimated_target.wav"
|
|
|
67
42
|
|
|
68
43
|
scores = predict_perceptual_evaluation_scores(original_files, estimate_file)
|
|
69
44
|
|
|
70
|
-
print(f"Overall Perceptual Score (OPS):
|
|
45
|
+
print(f"Overall Perceptual Score (OPS): {scores.overall_perceptual_score:.1f}/100")
|
|
71
46
|
print(f"Target Preservation Score (TPS): {scores.target_perceptual_score:.1f}/100")
|
|
72
|
-
print(f"Interference Rejection (IPS):
|
|
73
|
-
print(f"Artifact-free Score (APS):
|
|
47
|
+
print(f"Interference Rejection (IPS): {scores.interference_perceptual_score:.1f}/100")
|
|
48
|
+
print(f"Artifact-free Score (APS): {scores.artifact_perceptual_score:.1f}/100")
|
|
74
49
|
```
|
|
75
50
|
|
|
76
51
|
### 2. Score Evaluation with Waveform and File Expositions
|
|
@@ -142,6 +117,34 @@ true_target, target_distortion, interference, artifacts = (
|
|
|
142
117
|
|
|
143
118
|
---
|
|
144
119
|
|
|
120
|
+
## Scientific Highlights
|
|
121
|
+
|
|
122
|
+
Traditional evaluation metrics rely purely on linear energy ratios [1].
|
|
123
|
+
However, human hearing relies on non-linear auditory transduction, temporal masking, and cognitive thresholds [2].
|
|
124
|
+
This package replaces traditional energy ratio metrics (SDR, SIR, SAR) with perceptually motivated objective scores—
|
|
125
|
+
**OPS, TPS, IPS, and APS**—which align closely with subjective human listening evaluations [1].
|
|
126
|
+
|
|
127
|
+
`peass` executes a multi-stage cognitive simulation pipeline to assess separation quality:
|
|
128
|
+
|
|
129
|
+
1. **Subband Least-Squares Decomposition:**
|
|
130
|
+
Signals are divided into subbands using a complex-valued Hohmann Gammatone Filterbank [1, 3].
|
|
131
|
+
Overlapping temporal frames are projected onto estimated subspaces to isolate physical target distortion,
|
|
132
|
+
interference, and artifact components [1].
|
|
133
|
+
2. **Inner Hair Cell Transduction:**
|
|
134
|
+
Approximates the shearing limits of physical hair bundles via half-wave rectification and first-order 1 kHz
|
|
135
|
+
membrane-limit lowpass filters [1, 2].
|
|
136
|
+
3. **Auditory Nerve Adaptation:**
|
|
137
|
+
Models physiological forward masking and metabolic neural depletion via five cascaded stages of non-linear feedback
|
|
138
|
+
loops [2].
|
|
139
|
+
4. **Perceptual Assimilation:**
|
|
140
|
+
Models cognitive threshold masking where noise below a target reference threshold is partially assimilated or
|
|
141
|
+
masked [2].
|
|
142
|
+
5. **Score Prediction:**
|
|
143
|
+
Feeds weighted similarity percentiles into a multi-criteria trained sigmoidal neural network to output scores scaled
|
|
144
|
+
from `0` to `100` [1].
|
|
145
|
+
|
|
146
|
+
---
|
|
147
|
+
|
|
145
148
|
## Test Suite & CI/CD
|
|
146
149
|
|
|
147
150
|
The validation suite implements rigorous numerical, physical, and integration checks.
|
|
@@ -3,7 +3,7 @@ python-peass: Perceptual Evaluation methods for Audio Source Separation
|
|
|
3
3
|
A modern, Pythonic port of the PEASS v2.0.1 toolkit.
|
|
4
4
|
"""
|
|
5
5
|
|
|
6
|
-
__version__ = "2.0.1.
|
|
6
|
+
__version__ = "2.0.1.3" # matches peass version, with one more segment for me to edit
|
|
7
7
|
|
|
8
8
|
from .config import DecomposedFilePaths
|
|
9
9
|
from .config import DecomposedWaveforms
|
|
@@ -7,7 +7,6 @@ and fails over gracefully to a SciPy/NumPy native vectorization.
|
|
|
7
7
|
"""
|
|
8
8
|
|
|
9
9
|
import math
|
|
10
|
-
from typing import Tuple
|
|
11
10
|
|
|
12
11
|
import numpy as np
|
|
13
12
|
import scipy.signal as signal
|
|
@@ -269,7 +268,7 @@ def generate_auditory_internal_representation(
|
|
|
269
268
|
signal_data: np.ndarray,
|
|
270
269
|
sampling_frequency_hz: float,
|
|
271
270
|
modulation_processing_type: ModulationProcessingType = ModulationProcessingType.LOWPASS
|
|
272
|
-
) ->
|
|
271
|
+
) -> tuple[np.ndarray, float]:
|
|
273
272
|
"""Generates the 3D internal auditory representation of a signal."""
|
|
274
273
|
if len(signal_data.shape) > 1:
|
|
275
274
|
if signal_data.shape[0] < signal_data.shape[1]:
|
|
@@ -285,7 +284,6 @@ def generate_auditory_internal_representation(
|
|
|
285
284
|
# Decimate using polyphase FIR (avoids global FFT memory spikes)
|
|
286
285
|
if sampling_frequency_hz < 3.0 * maximum_frequency:
|
|
287
286
|
new_sampling_frequency = int(round(1.5 * sampling_frequency_hz))
|
|
288
|
-
# scaled_signal_data = signal.resample_poly(scaled_signal_data, new_sampling_frequency, int(sampling_frequency_hz))
|
|
289
287
|
scaled_signal_data = fast_resample_poly(
|
|
290
288
|
scaled_signal_data, new_sampling_frequency, int(sampling_frequency_hz)
|
|
291
289
|
)
|
|
@@ -321,17 +319,17 @@ def generate_auditory_internal_representation(
|
|
|
321
319
|
|
|
322
320
|
# 4. Modulation Filtering & Polyphase Decimation
|
|
323
321
|
if modulation_processing_type == ModulationProcessingType.FILTERBANK:
|
|
324
|
-
# downsampled_adapted = signal.resample_poly(adapted_signals, 800, int(sampling_frequency_hz), axis=-1)
|
|
325
322
|
downsampled_adapted = fast_resample_poly(adapted_signals, 800, int(sampling_frequency_hz), axis=-1)
|
|
326
323
|
sampling_frequency_hz = 800.0
|
|
327
324
|
modulation_center_frequencies = np.concatenate(([0.0, 5.0], 10.0 * (5.0 / 3.0) ** np.arange(6)))
|
|
328
325
|
modulation_bandwidths = np.concatenate(([5.0, 5.0], 5.0 * (5.0 / 3.0) ** np.arange(6)))
|
|
329
|
-
|
|
330
|
-
# downsampled_adapted = signal.resample_poly(adapted_signals, 100, int(sampling_frequency_hz), axis=-1)
|
|
326
|
+
elif modulation_processing_type == ModulationProcessingType.LOWPASS:
|
|
331
327
|
downsampled_adapted = fast_resample_poly(adapted_signals, 100, int(sampling_frequency_hz), axis=-1)
|
|
332
328
|
sampling_frequency_hz = 100.0
|
|
333
329
|
modulation_center_frequencies = np.array([0.0])
|
|
334
330
|
modulation_bandwidths = np.array([15.92])
|
|
331
|
+
else:
|
|
332
|
+
raise ValueError(f"Unknown {modulation_processing_type=}")
|
|
335
333
|
|
|
336
334
|
num_bands = adapted_signals.shape[0]
|
|
337
335
|
num_modulations = len(modulation_center_frequencies)
|
|
@@ -2,18 +2,12 @@
|
|
|
2
2
|
PEASS Configuration and Data Structures
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
|
-
import sys
|
|
6
5
|
from dataclasses import dataclass
|
|
7
6
|
from enum import Enum
|
|
8
7
|
from enum import auto
|
|
9
|
-
from typing import Optional
|
|
10
8
|
|
|
11
9
|
import numpy as np
|
|
12
10
|
|
|
13
|
-
# Dynamically enable slots only on Python 3.10+
|
|
14
|
-
# TODO: drop py3.9 support and just always include slots
|
|
15
|
-
_DATACLASS_KWARGS = {"slots": True} if sys.version_info >= (3, 10) else {}
|
|
16
|
-
|
|
17
11
|
|
|
18
12
|
class ModulationProcessingType(Enum):
|
|
19
13
|
"""Defines the type of modulation processing used in the auditory model."""
|
|
@@ -21,7 +15,7 @@ class ModulationProcessingType(Enum):
|
|
|
21
15
|
FILTERBANK = auto()
|
|
22
16
|
|
|
23
17
|
|
|
24
|
-
@dataclass(
|
|
18
|
+
@dataclass(slots=True)
|
|
25
19
|
class DecomposedWaveforms:
|
|
26
20
|
"""Holds the in-memory NumPy arrays for the decomposed physical components."""
|
|
27
21
|
true_target: np.ndarray
|
|
@@ -30,7 +24,7 @@ class DecomposedWaveforms:
|
|
|
30
24
|
artifacts: np.ndarray
|
|
31
25
|
|
|
32
26
|
|
|
33
|
-
@dataclass(
|
|
27
|
+
@dataclass(slots=True)
|
|
34
28
|
class DecomposedFilePaths:
|
|
35
29
|
"""Holds the absolute file paths to the generated WAV files on disk."""
|
|
36
30
|
true_target: str
|
|
@@ -39,14 +33,14 @@ class DecomposedFilePaths:
|
|
|
39
33
|
artifacts: str
|
|
40
34
|
|
|
41
35
|
|
|
42
|
-
@dataclass(
|
|
36
|
+
@dataclass(slots=True)
|
|
43
37
|
class DecompositionResult:
|
|
44
38
|
"""Wrapper holding both the arrays and optional file paths of a decomposition."""
|
|
45
39
|
waveforms: DecomposedWaveforms
|
|
46
|
-
file_paths:
|
|
40
|
+
file_paths: DecomposedFilePaths | None = None
|
|
47
41
|
|
|
48
42
|
|
|
49
|
-
@dataclass(
|
|
43
|
+
@dataclass(slots=True)
|
|
50
44
|
class DecompositionConfiguration:
|
|
51
45
|
"""Structural configurations for the subband least-squares windowing."""
|
|
52
46
|
destination_directory: str = "./"
|
|
@@ -58,7 +52,7 @@ class DecompositionConfiguration:
|
|
|
58
52
|
segmentation_factor: int = 1
|
|
59
53
|
|
|
60
54
|
|
|
61
|
-
@dataclass(
|
|
55
|
+
@dataclass(slots=True)
|
|
62
56
|
class PerceptualSeparationScores:
|
|
63
57
|
"""Final assessment metrics representing the predicted subjective evaluation."""
|
|
64
58
|
overall_perceptual_score: float
|
|
@@ -69,5 +63,5 @@ class PerceptualSeparationScores:
|
|
|
69
63
|
source_to_spatial_distortion_ratio: float
|
|
70
64
|
source_to_interference_ratio: float
|
|
71
65
|
source_to_artifacts_ratio: float
|
|
72
|
-
decomposition_waveforms:
|
|
73
|
-
decomposition_files:
|
|
66
|
+
decomposition_waveforms: DecomposedWaveforms | None = None
|
|
67
|
+
decomposition_files: DecomposedFilePaths | None = None
|
|
@@ -8,10 +8,6 @@ and zero-copy arrays.
|
|
|
8
8
|
|
|
9
9
|
import pathlib
|
|
10
10
|
from functools import lru_cache
|
|
11
|
-
from typing import List
|
|
12
|
-
from typing import Optional
|
|
13
|
-
from typing import Tuple
|
|
14
|
-
from typing import Union
|
|
15
11
|
|
|
16
12
|
import numpy as np
|
|
17
13
|
import scipy.linalg as linalg
|
|
@@ -230,7 +226,7 @@ def extract_target_spatial_distortion_interference_artifacts(
|
|
|
230
226
|
window_length: int,
|
|
231
227
|
hop_size: int,
|
|
232
228
|
use_two_stage_projection: bool = False
|
|
233
|
-
) ->
|
|
229
|
+
) -> tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
|
|
234
230
|
r"""
|
|
235
231
|
Splits multi-source signal mixtures into physical sub-components.
|
|
236
232
|
"""
|
|
@@ -300,8 +296,8 @@ def extract_target_spatial_distortion_interference_artifacts(
|
|
|
300
296
|
def run_auditory_analysis_filterbank(
|
|
301
297
|
signal_waveform: np.ndarray,
|
|
302
298
|
sampling_frequency_hz: float,
|
|
303
|
-
modulation_matrix:
|
|
304
|
-
) ->
|
|
299
|
+
modulation_matrix: np.ndarray | None = None
|
|
300
|
+
) -> tuple[list[np.ndarray], GammatoneAnalyzer, np.ndarray]:
|
|
305
301
|
"""Helper executing Gammatone Analysis subband decomposition."""
|
|
306
302
|
minimum_frequency = 20.0
|
|
307
303
|
maximum_frequency = sampling_frequency_hz / 2.0
|
|
@@ -389,7 +385,7 @@ def get_synthesis_modulation_matrix(
|
|
|
389
385
|
def run_auditory_synthesis_filterbank(
|
|
390
386
|
subband_list: list,
|
|
391
387
|
analyzer: GammatoneAnalyzer
|
|
392
|
-
) ->
|
|
388
|
+
) -> tuple[np.ndarray, GammatoneSynthesizer]:
|
|
393
389
|
"""Helper executing Gammatone synthesis reconstruction."""
|
|
394
390
|
num_bands = len(subband_list)
|
|
395
391
|
sampling_frequency = analyzer.sampling_frequency_hz
|
|
@@ -446,10 +442,10 @@ def run_auditory_synthesis_filterbank(
|
|
|
446
442
|
|
|
447
443
|
|
|
448
444
|
def decompose_distortion_components(
|
|
449
|
-
source_files:
|
|
450
|
-
estimate_file:
|
|
451
|
-
configuration:
|
|
452
|
-
sampling_frequency_hz:
|
|
445
|
+
source_files: list[str | pathlib.Path | np.ndarray],
|
|
446
|
+
estimate_file: str | pathlib.Path | np.ndarray,
|
|
447
|
+
configuration: DecompositionConfiguration | None = None,
|
|
448
|
+
sampling_frequency_hz: float | None = None
|
|
453
449
|
) -> DecompositionResult:
|
|
454
450
|
r"""
|
|
455
451
|
Decomposes an estimated source signal into physical distortion components.
|
|
@@ -460,7 +456,7 @@ def decompose_distortion_components(
|
|
|
460
456
|
if not source_files:
|
|
461
457
|
raise ValueError("source_files list cannot be empty.")
|
|
462
458
|
|
|
463
|
-
is_file_mode = isinstance(estimate_file,
|
|
459
|
+
is_file_mode = isinstance(estimate_file, str | pathlib.Path)
|
|
464
460
|
|
|
465
461
|
if is_file_mode:
|
|
466
462
|
# File-based mode (handled by soundfile, which defaults to samples-first)
|
|
@@ -7,7 +7,6 @@ of frequency analysis, delay/phase alignment, and synthesis reconstruction.
|
|
|
7
7
|
|
|
8
8
|
import math
|
|
9
9
|
from functools import lru_cache
|
|
10
|
-
from typing import List
|
|
11
10
|
|
|
12
11
|
import numpy as np
|
|
13
12
|
import scipy.signal as signal
|
|
@@ -255,7 +254,7 @@ class GammatoneAnalyzer:
|
|
|
255
254
|
specified_center_frequency_hz,
|
|
256
255
|
upper_cutoff_frequency_hz
|
|
257
256
|
)
|
|
258
|
-
self.filters:
|
|
257
|
+
self.filters: list[GammatoneFilter] = [
|
|
259
258
|
GammatoneFilter(sampling_frequency_hz, freq, filter_order, bandwidth_factor)
|
|
260
259
|
for freq in self.center_frequencies
|
|
261
260
|
]
|
|
@@ -5,8 +5,6 @@ Computes perceptual features and linear/energy ratio calculations
|
|
|
5
5
|
using extreme N-dimensional vectorized broadcasting.
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
|
-
from typing import Tuple
|
|
9
|
-
|
|
10
8
|
import numpy as np
|
|
11
9
|
|
|
12
10
|
from .auditory_model import generate_auditory_internal_representation
|
|
@@ -17,7 +15,7 @@ def calculate_bss_eval_energy_ratios(
|
|
|
17
15
|
target_distortion: np.ndarray,
|
|
18
16
|
interference: np.ndarray,
|
|
19
17
|
artifacts: np.ndarray
|
|
20
|
-
) ->
|
|
18
|
+
) -> tuple[float, float, float, float]:
|
|
21
19
|
r"""
|
|
22
20
|
Computes standard BSS Eval energy ratio metrics from physically decomposed components.
|
|
23
21
|
|
|
@@ -30,7 +28,7 @@ def calculate_bss_eval_energy_ratios(
|
|
|
30
28
|
:param artifacts: Non-linear processing artifacts.
|
|
31
29
|
:type artifacts: numpy.ndarray
|
|
32
30
|
:return: A tuple of (ISR, SIR, SAR, SDR) in Decibels (dB).
|
|
33
|
-
:rtype:
|
|
31
|
+
:rtype: tuple[float, float, float, float]
|
|
34
32
|
"""
|
|
35
33
|
flat_true_source = true_source.ravel()
|
|
36
34
|
flat_target_distortion = target_distortion.ravel()
|
|
@@ -168,9 +166,9 @@ def calculate_auditory_similarity_metric(
|
|
|
168
166
|
|
|
169
167
|
|
|
170
168
|
def calculate_auditory_quality_features(
|
|
171
|
-
decomposition_signals:
|
|
169
|
+
decomposition_signals: tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray],
|
|
172
170
|
sampling_frequency_hz: float = 16000.0
|
|
173
|
-
) ->
|
|
171
|
+
) -> tuple[float, float, float, float]:
|
|
174
172
|
"""Computes quality features by sending decomposed signals through the internal auditory model."""
|
|
175
173
|
true_target, target_distortion, interference, artifacts = decomposition_signals
|
|
176
174
|
|
|
@@ -8,9 +8,6 @@ Maps raw auditory similarity scores to Predicted Perceptual Scores
|
|
|
8
8
|
import os
|
|
9
9
|
import pathlib
|
|
10
10
|
from functools import lru_cache
|
|
11
|
-
from typing import List
|
|
12
|
-
from typing import Optional
|
|
13
|
-
from typing import Union
|
|
14
11
|
|
|
15
12
|
import numpy as np
|
|
16
13
|
import soundfile as sf
|
|
@@ -60,10 +57,10 @@ def evaluate_neural_network_mapping(
|
|
|
60
57
|
|
|
61
58
|
|
|
62
59
|
def predict_perceptual_evaluation_scores(
|
|
63
|
-
original_files:
|
|
64
|
-
estimate_file:
|
|
65
|
-
configuration:
|
|
66
|
-
sampling_frequency_hz:
|
|
60
|
+
original_files: list[str | pathlib.Path | np.ndarray],
|
|
61
|
+
estimate_file: str | pathlib.Path | np.ndarray,
|
|
62
|
+
configuration: DecompositionConfiguration | None = None,
|
|
63
|
+
sampling_frequency_hz: float | None = None,
|
|
67
64
|
return_decomposition: bool = False
|
|
68
65
|
) -> PerceptualSeparationScores:
|
|
69
66
|
r"""
|
|
@@ -82,7 +79,7 @@ def predict_perceptual_evaluation_scores(
|
|
|
82
79
|
waveforms = decomposition_result.waveforms
|
|
83
80
|
|
|
84
81
|
if sampling_frequency_hz is None:
|
|
85
|
-
if isinstance(estimate_file,
|
|
82
|
+
if isinstance(estimate_file, str | pathlib.Path):
|
|
86
83
|
_, sampling_frequency_hz = sf.read(estimate_file)
|
|
87
84
|
else:
|
|
88
85
|
sampling_frequency_hz = 16000.0
|
|
@@ -10,13 +10,12 @@ authors = [
|
|
|
10
10
|
]
|
|
11
11
|
readme = "README.md"
|
|
12
12
|
license = {file = "LICENSE"}
|
|
13
|
-
requires-python = ">=3.
|
|
13
|
+
requires-python = ">=3.10"
|
|
14
14
|
classifiers = [
|
|
15
15
|
"Development Status :: 4 - Beta",
|
|
16
16
|
"Intended Audience :: Science/Research",
|
|
17
17
|
"License :: OSI Approved :: GNU General Public License v3 (GPLv3)",
|
|
18
18
|
"Programming Language :: Python :: 3",
|
|
19
|
-
"Programming Language :: Python :: 3.9",
|
|
20
19
|
"Programming Language :: Python :: 3.10",
|
|
21
20
|
"Programming Language :: Python :: 3.11",
|
|
22
21
|
"Programming Language :: Python :: 3.12",
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|