neverlib 0.2.5__py3-none-any.whl → 0.2.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- neverlib/.claude/settings.local.json +9 -0
- neverlib/Docs/audio_aug/test_volume.ipynb +416 -0
- neverlib/Docs/audio_aug_test/test_volume.ipynb +289 -0
- neverlib/Docs/filter/biquad.ipynb +129 -0
- neverlib/Docs/filter/filter_family.ipynb +450 -0
- neverlib/Docs/filter/highpass.ipynb +139 -0
- neverlib/Docs/filter/scipy_filter_family.ipynb +110 -0
- neverlib/Docs/vad/VAD_Energy.ipynb +167 -0
- neverlib/Docs/vad/VAD_Silero.ipynb +325 -0
- neverlib/Docs/vad/VAD_WebRTC.ipynb +189 -0
- neverlib/Docs/vad/VAD_funasr.ipynb +192 -0
- neverlib/Docs/vad/VAD_rvADfast.ipynb +162 -0
- neverlib/Docs/vad/VAD_statistics.ipynb +532 -0
- neverlib/Docs/vad/VAD_tenVAD.ipynb +292 -0
- neverlib/Docs/vad/VAD_vadlib.ipynb +168 -0
- neverlib/Docs/vad/VAD_whisper.ipynb +404 -0
- neverlib/QA/gen_init.py +117 -0
- neverlib/QA/get_fun.py +19 -0
- neverlib/__init__.py +21 -4
- neverlib/audio_aug/HarmonicDistortion.py +19 -13
- neverlib/audio_aug/__init__.py +30 -12
- neverlib/audio_aug/audio_aug.py +19 -14
- neverlib/audio_aug/clip_aug.py +15 -18
- neverlib/audio_aug/coder_aug.py +44 -24
- neverlib/audio_aug/coder_aug2.py +54 -37
- neverlib/audio_aug/loss_packet_aug.py +7 -7
- neverlib/audio_aug/quant_aug.py +19 -17
- neverlib/data/000_short_enhance.wav +0 -0
- neverlib/data/3956_speech.wav +0 -0
- neverlib/data/3956_sweep.wav +0 -0
- neverlib/data/vad_example.wav +0 -0
- neverlib/data/white.wav +0 -0
- neverlib/data/white_EQ.wav +0 -0
- neverlib/data/white_matched.wav +0 -0
- neverlib/data_analyze/__init__.py +25 -20
- neverlib/data_analyze/dataset_analyzer.py +109 -114
- neverlib/data_analyze/quality_metrics.py +87 -89
- neverlib/data_analyze/rms_distrubution.py +23 -42
- neverlib/data_analyze/spectral_analysis.py +43 -46
- neverlib/data_analyze/statistics.py +76 -76
- neverlib/data_analyze/temporal_features.py +15 -6
- neverlib/data_analyze/visualization.py +208 -144
- neverlib/filter/__init__.py +17 -20
- neverlib/filter/auto_eq/__init__.py +18 -35
- neverlib/filter/auto_eq/de_eq.py +0 -2
- neverlib/filter/common.py +24 -5
- neverlib/metrics/DNSMOS/bak_ovr.onnx +0 -0
- neverlib/metrics/DNSMOS/model_v8.onnx +0 -0
- neverlib/metrics/DNSMOS/sig.onnx +0 -0
- neverlib/metrics/DNSMOS/sig_bak_ovr.onnx +0 -0
- neverlib/metrics/__init__.py +23 -0
- neverlib/metrics/dnsmos.py +4 -15
- neverlib/metrics/pDNSMOS/sig_bak_ovr.onnx +0 -0
- neverlib/metrics/pesq_c/PESQ +0 -0
- neverlib/metrics/pesq_c/dsp.c +553 -0
- neverlib/metrics/pesq_c/dsp.h +138 -0
- neverlib/metrics/pesq_c/pesq.h +294 -0
- neverlib/metrics/pesq_c/pesqdsp.c +1047 -0
- neverlib/metrics/pesq_c/pesqio.c +392 -0
- neverlib/metrics/pesq_c/pesqmain.c +610 -0
- neverlib/metrics/pesq_c/pesqmod.c +1417 -0
- neverlib/metrics/pesq_c/pesqpar.h +297 -0
- neverlib/metrics/snr.py +5 -1
- neverlib/metrics/spec.py +31 -21
- neverlib/metrics/test_pesq.py +0 -4
- neverlib/tests/test_imports.py +17 -0
- neverlib/utils/__init__.py +26 -15
- neverlib/utils/audio_split.py +5 -1
- neverlib/utils/checkGPU.py +17 -9
- neverlib/utils/lazy_expose.py +29 -0
- neverlib/utils/utils.py +40 -12
- neverlib/vad/__init__.py +33 -25
- neverlib/vad/class_get_speech.py +1 -1
- neverlib/vad/class_vad.py +3 -3
- neverlib/vad/img.png +0 -0
- {neverlib-0.2.5.dist-info → neverlib-0.2.7.dist-info}/METADATA +20 -17
- {neverlib-0.2.5.dist-info → neverlib-0.2.7.dist-info}/RECORD +80 -37
- {neverlib-0.2.5.dist-info → neverlib-0.2.7.dist-info}/WHEEL +0 -0
- {neverlib-0.2.5.dist-info → neverlib-0.2.7.dist-info}/licenses/LICENSE +0 -0
- {neverlib-0.2.5.dist-info → neverlib-0.2.7.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,297 @@
|
|
|
1
|
+
/*****************************************************************************
|
|
2
|
+
|
|
3
|
+
Perceptual Evaluation of Speech Quality (PESQ)
|
|
4
|
+
ITU-T Recommendation P.862.
|
|
5
|
+
Version 1.2 - 2 August 2002.
|
|
6
|
+
|
|
7
|
+
****************************************
|
|
8
|
+
PESQ Intellectual Property Rights Notice
|
|
9
|
+
****************************************
|
|
10
|
+
|
|
11
|
+
DEFINITIONS:
|
|
12
|
+
------------
|
|
13
|
+
For the purposes of this Intellectual Property Rights Notice
|
|
14
|
+
the terms �Perceptual Evaluation of Speech Quality Algorithm�
|
|
15
|
+
and �PESQ Algorithm� refer to the objective speech quality
|
|
16
|
+
measurement algorithm defined in ITU-T Recommendation P.862;
|
|
17
|
+
the term �PESQ Software� refers to the C-code component of P.862.
|
|
18
|
+
|
|
19
|
+
NOTICE:
|
|
20
|
+
-------
|
|
21
|
+
All copyright, trade marks, trade names, patents, know-how and
|
|
22
|
+
all or any other intellectual rights subsisting in or used in
|
|
23
|
+
connection with including all algorithms, documents and manuals
|
|
24
|
+
relating to the PESQ Algorithm and or PESQ Software are and remain
|
|
25
|
+
the sole property in law, ownership, regulations, treaties and
|
|
26
|
+
patent rights of the Owners identified below. The user may not
|
|
27
|
+
dispute or question the ownership of the PESQ Algorithm and
|
|
28
|
+
or PESQ Software.
|
|
29
|
+
|
|
30
|
+
OWNERS ARE:
|
|
31
|
+
-----------
|
|
32
|
+
|
|
33
|
+
1. British Telecommunications plc (BT), all rights assigned
|
|
34
|
+
to Psytechnics Limited
|
|
35
|
+
2. Royal KPN NV, all rights assigned to OPTICOM GmbH
|
|
36
|
+
|
|
37
|
+
RESTRICTIONS:
|
|
38
|
+
-------------
|
|
39
|
+
|
|
40
|
+
The user cannot:
|
|
41
|
+
|
|
42
|
+
1. alter, duplicate, modify, adapt, or translate in whole or in
|
|
43
|
+
part any aspect of the PESQ Algorithm and or PESQ Software
|
|
44
|
+
2. sell, hire, loan, distribute, dispose or put to any commercial
|
|
45
|
+
use other than those permitted below in whole or in part any
|
|
46
|
+
aspect of the PESQ Algorithm and or PESQ Software
|
|
47
|
+
|
|
48
|
+
PERMITTED USE:
|
|
49
|
+
--------------
|
|
50
|
+
|
|
51
|
+
The user may:
|
|
52
|
+
|
|
53
|
+
1. Use the PESQ Software to:
|
|
54
|
+
i) understand the PESQ Algorithm; or
|
|
55
|
+
ii) evaluate the ability of the PESQ Algorithm to perform
|
|
56
|
+
its intended function of predicting the speech quality
|
|
57
|
+
of a system; or
|
|
58
|
+
iii) evaluate the computational complexity of the PESQ Algorithm,
|
|
59
|
+
with the limitation that none of said evaluations or its
|
|
60
|
+
results shall be used for external commercial use.
|
|
61
|
+
|
|
62
|
+
2. Use the PESQ Software to test if an implementation of the PESQ
|
|
63
|
+
Algorithm conforms to ITU-T Recommendation P.862.
|
|
64
|
+
|
|
65
|
+
3. With the prior written permission of both Psytechnics Limited
|
|
66
|
+
and OPTICOM GmbH, use the PESQ Software in accordance with the
|
|
67
|
+
above Restrictions to perform work that meets all of the following
|
|
68
|
+
criteria:
|
|
69
|
+
i) the work must contribute directly to the maintenance of an
|
|
70
|
+
existing ITU recommendation or the development of a new ITU
|
|
71
|
+
recommendation under an approved ITU Study Item; and
|
|
72
|
+
ii) the work and its results must be fully described in a
|
|
73
|
+
written contribution to the ITU that is presented at a formal
|
|
74
|
+
ITU meeting within one year of the start of the work; and
|
|
75
|
+
iii) neither the work nor its results shall be put to any
|
|
76
|
+
commercial use other than making said contribution to the ITU.
|
|
77
|
+
Said permission will be provided on a case-by-case basis.
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
ANY OTHER USE OR APPLICATION OF THE PESQ SOFTWARE AND/OR THE PESQ
|
|
81
|
+
ALGORITHM WILL REQUIRE A PESQ LICENCE AGREEMENT, WHICH MAY BE OBTAINED
|
|
82
|
+
FROM EITHER OPTICOM GMBH OR PSYTECHNICS LIMITED.
|
|
83
|
+
|
|
84
|
+
EACH COMPANY OFFERS OEM LICENSE AGREEMENTS, WHICH COMBINE OEM
|
|
85
|
+
IMPLEMENTATIONS OF THE PESQ ALGORITHM TOGETHER WITH A PESQ PATENT LICENSE
|
|
86
|
+
AGREEMENT. PESQ PATENT-ONLY LICENSE AGREEMENTS MAY BE OBTAINED FROM OPTICOM.
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
***********************************************************************
|
|
90
|
+
* OPTICOM GmbH * Psytechnics Limited *
|
|
91
|
+
* Am Weichselgarten 7, * Fraser House, 23 Museum Street, *
|
|
92
|
+
* D- 91058 Erlangen, Germany * Ipswich IP1 1HN, England *
|
|
93
|
+
* Phone: +49 (0) 9131 691 160 * Phone: +44 (0) 1473 261 800 *
|
|
94
|
+
* Fax: +49 (0) 9131 691 325 * Fax: +44 (0) 1473 261 880 *
|
|
95
|
+
* E-mail: info@opticom.de, * E-mail: info@psytechnics.com, *
|
|
96
|
+
* www.opticom.de * www.psytechnics.com *
|
|
97
|
+
***********************************************************************
|
|
98
|
+
|
|
99
|
+
Further information is also available from www.pesq.org
|
|
100
|
+
|
|
101
|
+
*****************************************************************************/
|
|
102
|
+
|
|
103
|
+
long Fs = 0L;
|
|
104
|
+
|
|
105
|
+
long Fs_16k = 16000L;
|
|
106
|
+
|
|
107
|
+
long Fs_8k = 8000L;
|
|
108
|
+
|
|
109
|
+
long Downsample;
|
|
110
|
+
|
|
111
|
+
long Downsample_16k = 64;
|
|
112
|
+
|
|
113
|
+
long Downsample_8k = 32;
|
|
114
|
+
|
|
115
|
+
long Align_Nfft;
|
|
116
|
+
|
|
117
|
+
long Align_Nfft_16k = 1024;
|
|
118
|
+
|
|
119
|
+
long Align_Nfft_8k = 512;
|
|
120
|
+
|
|
121
|
+
long InIIR_Nsos;
|
|
122
|
+
|
|
123
|
+
long InIIR_Nsos_8k = 8L;
|
|
124
|
+
float InIIR_Hsos_8k[LINIIR] =
|
|
125
|
+
{ 0.885535424f, -0.885535424f, 0.000000000f, -0.771070709f, 0.000000000f,
|
|
126
|
+
0.895092588f, 1.292907193f, 0.449260174f, 1.268869037f, 0.442025372f,
|
|
127
|
+
4.049527940f, -7.865190042f, 3.815662102f, -1.746859852f, 0.786305963f,
|
|
128
|
+
0.500002353f, -0.500002353f, 0.000000000f, 0.000000000f, 0.000000000f,
|
|
129
|
+
0.565002834f, -0.241585934f, -0.306009671f, 0.259688659f, 0.249979657f,
|
|
130
|
+
2.115237288f, 0.919935084f, 1.141240051f, -1.587313419f, 0.665935315f,
|
|
131
|
+
0.912224584f, -0.224397719f, -0.641121413f, -0.246029464f, -0.556720590f,
|
|
132
|
+
0.444617727f, -0.307589321f, 0.141638062f, -0.996391149f, 0.502251622f };
|
|
133
|
+
|
|
134
|
+
long InIIR_Nsos_16k = 12L;
|
|
135
|
+
float InIIR_Hsos_16k[LINIIR] =
|
|
136
|
+
{ 0.325631521f, -0.086782860f, -0.238848661f, -1.079416490f, 0.434583902f,
|
|
137
|
+
0.403961804f, -0.556985881f, 0.153024077f, -0.415115835f, 0.696590244f,
|
|
138
|
+
4.736162769f, 3.287251046f, 1.753289019f, -1.859599046f, 0.876284034f,
|
|
139
|
+
0.365373469f, 0.000000000f, 0.000000000f, -0.634626531f, 0.000000000f,
|
|
140
|
+
0.884811506f, 0.000000000f, 0.000000000f, -0.256725271f, 0.141536777f,
|
|
141
|
+
0.723593055f, -1.447186099f, 0.723593044f, -1.129587469f, 0.657232737f,
|
|
142
|
+
1.644910855f, -1.817280902f, 1.249658063f, -1.778403899f, 0.801724355f,
|
|
143
|
+
0.633692689f, -0.284644314f, -0.319789663f, 0.000000000f, 0.000000000f,
|
|
144
|
+
1.032763031f, 0.268428979f, 0.602913323f, 0.000000000f, 0.000000000f,
|
|
145
|
+
1.001616361f, -0.823749013f, 0.439731942f, -0.885778255f, 0.000000000f,
|
|
146
|
+
0.752472096f, -0.375388990f, 0.188977609f, -0.077258216f, 0.247230734f,
|
|
147
|
+
1.023700575f, 0.001661628f, 0.521284240f, -0.183867259f, 0.354324187f };
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
int nr_of_hz_bands_per_bark_band_8k [42] = { 1, 1, 1, 1, 1,
|
|
151
|
+
1, 1, 1, 2, 1,
|
|
152
|
+
1, 1, 1, 1, 2,
|
|
153
|
+
1, 1, 2, 2, 2,
|
|
154
|
+
2, 2, 2, 2, 2,
|
|
155
|
+
3, 3, 3, 3, 4,
|
|
156
|
+
3, 4, 5, 4, 5,
|
|
157
|
+
6, 6, 7, 8, 9,
|
|
158
|
+
9, 11, };
|
|
159
|
+
|
|
160
|
+
double centre_of_band_bark_8k [42] = { 0.078672, 0.316341, 0.636559, 0.961246, 1.290450,
|
|
161
|
+
1.624217, 1.962597, 2.305636, 2.653383, 3.005889,
|
|
162
|
+
3.363201, 3.725371, 4.092449, 4.464486, 4.841533,
|
|
163
|
+
5.223642, 5.610866, 6.003256, 6.400869, 6.803755,
|
|
164
|
+
7.211971, 7.625571, 8.044611, 8.469146, 8.899232,
|
|
165
|
+
9.334927, 9.776288, 10.223374, 10.676242, 11.134952,
|
|
166
|
+
11.599563, 12.070135, 12.546731, 13.029408, 13.518232,
|
|
167
|
+
14.013264, 14.514566, 15.022202, 15.536238, 16.056736,
|
|
168
|
+
16.583761, 17.117382};
|
|
169
|
+
|
|
170
|
+
double centre_of_band_hz_8k [42] = { 7.867213, 31.634144, 63.655895, 96.124611, 129.044968,
|
|
171
|
+
162.421738, 196.259659, 230.563568, 265.338348, 300.588867,
|
|
172
|
+
336.320129, 372.537140, 409.244934, 446.448578, 484.568604,
|
|
173
|
+
526.600586, 570.303833, 619.423340, 672.121643, 728.525696,
|
|
174
|
+
785.675964, 846.835693, 909.691650, 977.063293, 1049.861694,
|
|
175
|
+
1129.635986, 1217.257568, 1312.109497, 1412.501465, 1517.999390,
|
|
176
|
+
1628.894165, 1746.194336, 1871.568848, 2008.776123, 2158.979248,
|
|
177
|
+
2326.743164, 2513.787109, 2722.488770, 2952.586670, 3205.835449,
|
|
178
|
+
3492.679932, 3820.219238};
|
|
179
|
+
|
|
180
|
+
double width_of_band_bark_8k [42] = { 0.157344, 0.317994, 0.322441, 0.326934, 0.331474,
|
|
181
|
+
0.336061, 0.340697, 0.345381, 0.350114, 0.354897,
|
|
182
|
+
0.359729, 0.364611, 0.369544, 0.374529, 0.379565,
|
|
183
|
+
0.384653, 0.389794, 0.394989, 0.400236, 0.405538,
|
|
184
|
+
0.410894, 0.416306, 0.421773, 0.427297, 0.432877,
|
|
185
|
+
0.438514, 0.444209, 0.449962, 0.455774, 0.461645,
|
|
186
|
+
0.467577, 0.473569, 0.479621, 0.485736, 0.491912,
|
|
187
|
+
0.498151, 0.504454, 0.510819, 0.517250, 0.523745,
|
|
188
|
+
0.530308, 0.536934};
|
|
189
|
+
|
|
190
|
+
double width_of_band_hz_8k [42] = { 15.734426, 31.799433, 32.244064, 32.693359, 33.147385,
|
|
191
|
+
33.606140, 34.069702, 34.538116, 35.011429, 35.489655,
|
|
192
|
+
35.972870, 36.461121, 36.954407, 37.452911, 40.269653,
|
|
193
|
+
42.311859, 45.992554, 51.348511, 55.040527, 56.775208,
|
|
194
|
+
58.699402, 62.445862, 64.820923, 69.195374, 76.745667,
|
|
195
|
+
84.016235, 90.825684, 97.931152, 103.348877, 107.801880,
|
|
196
|
+
113.552246, 121.490601, 130.420410, 143.431763, 158.486816,
|
|
197
|
+
176.872803, 198.314697, 219.549561, 240.600098, 268.702393,
|
|
198
|
+
306.060059, 349.937012};
|
|
199
|
+
|
|
200
|
+
double pow_dens_correction_factor_8k [42] = { 100.000000, 99.999992, 100.000000, 100.000008, 100.000008,
|
|
201
|
+
100.000015, 99.999992, 99.999969, 50.000027, 100.000000,
|
|
202
|
+
99.999969, 100.000015, 99.999947, 100.000061, 53.047077,
|
|
203
|
+
110.000046, 117.991989, 65.000000, 68.760147, 69.999931,
|
|
204
|
+
71.428818, 75.000038, 76.843384, 80.968781, 88.646126,
|
|
205
|
+
63.864388, 68.155350, 72.547775, 75.584831, 58.379192,
|
|
206
|
+
80.950836, 64.135651, 54.384785, 73.821884, 64.437073,
|
|
207
|
+
59.176456, 65.521278, 61.399822, 58.144047, 57.004543,
|
|
208
|
+
64.126297, 59.248363};
|
|
209
|
+
|
|
210
|
+
double abs_thresh_power_8k [42] = {51286152.000000, 2454709.500000, 70794.593750, 4897.788574, 1174.897705,
|
|
211
|
+
389.045166, 104.712860, 45.708820, 17.782795, 9.772372,
|
|
212
|
+
4.897789, 3.090296, 1.905461, 1.258925, 0.977237,
|
|
213
|
+
0.724436, 0.562341, 0.457088, 0.389045, 0.331131,
|
|
214
|
+
0.295121, 0.269153, 0.257040, 0.251189, 0.251189,
|
|
215
|
+
0.251189, 0.251189, 0.263027, 0.288403, 0.309030,
|
|
216
|
+
0.338844, 0.371535, 0.398107, 0.436516, 0.467735,
|
|
217
|
+
0.489779, 0.501187, 0.501187, 0.512861, 0.524807,
|
|
218
|
+
0.524807, 0.524807};
|
|
219
|
+
|
|
220
|
+
int nr_of_hz_bands_per_bark_band_16k [49] = { 1, 1, 1, 1, 1,
|
|
221
|
+
1, 1, 1, 2, 1,
|
|
222
|
+
1, 1, 1, 1, 2,
|
|
223
|
+
1, 1, 2, 2, 2,
|
|
224
|
+
2, 2, 2, 2, 2,
|
|
225
|
+
3, 3, 3, 3, 4,
|
|
226
|
+
3, 4, 5, 4, 5,
|
|
227
|
+
6, 6, 7, 8, 9,
|
|
228
|
+
9, 12, 12, 15, 16,
|
|
229
|
+
18, 21, 25, 20};
|
|
230
|
+
|
|
231
|
+
double centre_of_band_bark_16k [49] = { 0.078672, 0.316341, 0.636559, 0.961246, 1.290450,
|
|
232
|
+
1.624217, 1.962597, 2.305636, 2.653383, 3.005889,
|
|
233
|
+
3.363201, 3.725371, 4.092449, 4.464486, 4.841533,
|
|
234
|
+
5.223642, 5.610866, 6.003256, 6.400869, 6.803755,
|
|
235
|
+
7.211971, 7.625571, 8.044611, 8.469146, 8.899232,
|
|
236
|
+
9.334927, 9.776288, 10.223374, 10.676242, 11.134952,
|
|
237
|
+
11.599563, 12.070135, 12.546731, 13.029408, 13.518232,
|
|
238
|
+
14.013264, 14.514566, 15.022202, 15.536238, 16.056736,
|
|
239
|
+
16.583761, 17.117382, 17.657663, 18.204674, 18.758478,
|
|
240
|
+
19.319147, 19.886751, 20.461355, 21.043034};
|
|
241
|
+
|
|
242
|
+
double centre_of_band_hz_16k [49] = { 7.867213, 31.634144, 63.655895, 96.124611, 129.044968,
|
|
243
|
+
162.421738, 196.259659, 230.563568, 265.338348, 300.588867,
|
|
244
|
+
336.320129, 372.537140, 409.244934, 446.448578, 484.568604,
|
|
245
|
+
526.600586, 570.303833, 619.423340, 672.121643, 728.525696,
|
|
246
|
+
785.675964, 846.835693, 909.691650, 977.063293, 1049.861694,
|
|
247
|
+
1129.635986, 1217.257568, 1312.109497, 1412.501465, 1517.999390,
|
|
248
|
+
1628.894165, 1746.194336, 1871.568848, 2008.776123, 2158.979248,
|
|
249
|
+
2326.743164, 2513.787109, 2722.488770, 2952.586670, 3205.835449,
|
|
250
|
+
3492.679932, 3820.219238, 4193.938477, 4619.846191, 5100.437012,
|
|
251
|
+
5636.199219, 6234.313477, 6946.734863, 7796.473633};
|
|
252
|
+
|
|
253
|
+
double width_of_band_bark_16k [49] = { 0.157344, 0.317994, 0.322441, 0.326934, 0.331474,
|
|
254
|
+
0.336061, 0.340697, 0.345381, 0.350114, 0.354897,
|
|
255
|
+
0.359729, 0.364611, 0.369544, 0.374529, 0.379565,
|
|
256
|
+
0.384653, 0.389794, 0.394989, 0.400236, 0.405538,
|
|
257
|
+
0.410894, 0.416306, 0.421773, 0.427297, 0.432877,
|
|
258
|
+
0.438514, 0.444209, 0.449962, 0.455774, 0.461645,
|
|
259
|
+
0.467577, 0.473569, 0.479621, 0.485736, 0.491912,
|
|
260
|
+
0.498151, 0.504454, 0.510819, 0.517250, 0.523745,
|
|
261
|
+
0.530308, 0.536934, 0.543629, 0.550390, 0.557220,
|
|
262
|
+
0.564119, 0.571085, 0.578125, 0.585232};
|
|
263
|
+
|
|
264
|
+
double width_of_band_hz_16k [49] = { 15.734426, 31.799433, 32.244064, 32.693359, 33.147385,
|
|
265
|
+
33.606140, 34.069702, 34.538116, 35.011429, 35.489655,
|
|
266
|
+
35.972870, 36.461121, 36.954407, 37.452911, 40.269653,
|
|
267
|
+
42.311859, 45.992554, 51.348511, 55.040527, 56.775208,
|
|
268
|
+
58.699402, 62.445862, 64.820923, 69.195374, 76.745667,
|
|
269
|
+
84.016235, 90.825684, 97.931152, 103.348877, 107.801880,
|
|
270
|
+
113.552246, 121.490601, 130.420410, 143.431763, 158.486816,
|
|
271
|
+
176.872803, 198.314697, 219.549561, 240.600098, 268.702393,
|
|
272
|
+
306.060059, 349.937012, 398.686279, 454.713867, 506.841797,
|
|
273
|
+
564.863770, 637.261230, 794.717285, 931.068359};
|
|
274
|
+
|
|
275
|
+
double pow_dens_correction_factor_16k [49] = { 100.000000, 99.999992, 100.000000, 100.000008, 100.000008,
|
|
276
|
+
100.000015, 99.999992, 99.999969, 50.000027, 100.000000,
|
|
277
|
+
99.999969, 100.000015, 99.999947, 100.000061, 53.047077,
|
|
278
|
+
110.000046, 117.991989, 65.000000, 68.760147, 69.999931,
|
|
279
|
+
71.428818, 75.000038, 76.843384, 80.968781, 88.646126,
|
|
280
|
+
63.864388, 68.155350, 72.547775, 75.584831, 58.379192,
|
|
281
|
+
80.950836, 64.135651, 54.384785, 73.821884, 64.437073,
|
|
282
|
+
59.176456, 65.521278, 61.399822, 58.144047, 57.004543,
|
|
283
|
+
64.126297, 54.311001, 61.114979, 55.077751, 56.849335,
|
|
284
|
+
55.628868, 53.137054, 54.985844, 79.546974};
|
|
285
|
+
double abs_thresh_power_16k [49] = {51286152.000000, 2454709.500000, 70794.593750, 4897.788574, 1174.897705,
|
|
286
|
+
389.045166, 104.712860, 45.708820, 17.782795, 9.772372,
|
|
287
|
+
4.897789, 3.090296, 1.905461, 1.258925, 0.977237,
|
|
288
|
+
0.724436, 0.562341, 0.457088, 0.389045, 0.331131,
|
|
289
|
+
0.295121, 0.269153, 0.257040, 0.251189, 0.251189,
|
|
290
|
+
0.251189, 0.251189, 0.263027, 0.288403, 0.309030,
|
|
291
|
+
0.338844, 0.371535, 0.398107, 0.436516, 0.467735,
|
|
292
|
+
0.489779, 0.501187, 0.501187, 0.512861, 0.524807,
|
|
293
|
+
0.524807, 0.524807, 0.512861, 0.478630, 0.426580,
|
|
294
|
+
0.371535, 0.363078, 0.416869, 0.537032};
|
|
295
|
+
|
|
296
|
+
|
|
297
|
+
/* END OF FILE */
|
neverlib/metrics/snr.py
CHANGED
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import librosa
|
|
2
1
|
import numpy as np
|
|
3
2
|
from neverlib.vad.utils import vad2nad
|
|
4
3
|
from neverlib.filter import HPFilter
|
|
@@ -72,6 +71,11 @@ def seg_snr(clean, noisy, frame_length: int, hop_length: int):
|
|
|
72
71
|
Raises:
|
|
73
72
|
ValueError: 当输入参数不合法时抛出
|
|
74
73
|
"""
|
|
74
|
+
try:
|
|
75
|
+
import librosa
|
|
76
|
+
except Exception as e:
|
|
77
|
+
raise ImportError("需要安装 librosa 才能使用 seg_snr: pip install librosa") from e
|
|
78
|
+
|
|
75
79
|
assert clean.shape == noisy.shape, "clean和noisy的维度不一样"
|
|
76
80
|
|
|
77
81
|
# 分帧
|
neverlib/metrics/spec.py
CHANGED
|
@@ -36,20 +36,24 @@ def sd(ref_wav, test_wav, n_fft=2048, hop_length=512, win_length=None):
|
|
|
36
36
|
float: 频谱距离值,值越小表示两个信号越相似
|
|
37
37
|
"""
|
|
38
38
|
assert len(ref_wav) == len(test_wav), "输入信号长度必须相同"
|
|
39
|
-
|
|
39
|
+
|
|
40
40
|
# 计算短时傅里叶变换
|
|
41
|
-
ref_spec = librosa.stft(ref_wav,
|
|
42
|
-
|
|
43
|
-
|
|
41
|
+
ref_spec = librosa.stft(ref_wav,
|
|
42
|
+
n_fft=n_fft,
|
|
43
|
+
hop_length=hop_length,
|
|
44
|
+
win_length=win_length)
|
|
45
|
+
test_spec = librosa.stft(test_wav,
|
|
46
|
+
n_fft=n_fft,
|
|
47
|
+
hop_length=hop_length,
|
|
48
|
+
win_length=win_length)
|
|
49
|
+
|
|
44
50
|
# 计算频谱距离:均方根误差
|
|
45
51
|
spec_diff = ref_spec - test_spec
|
|
46
|
-
squared_diff = np.abs(spec_diff)
|
|
52
|
+
squared_diff = np.abs(spec_diff)**2
|
|
47
53
|
mean_squared_diff = np.mean(squared_diff)
|
|
48
54
|
sd_value = np.sqrt(mean_squared_diff)
|
|
49
|
-
|
|
50
|
-
return sd_value
|
|
51
|
-
|
|
52
55
|
|
|
56
|
+
return sd_value
|
|
53
57
|
|
|
54
58
|
|
|
55
59
|
def lsd(ref_wav, test_wav, n_fft=2048, hop_length=512, win_length=None):
|
|
@@ -69,20 +73,26 @@ def lsd(ref_wav, test_wav, n_fft=2048, hop_length=512, win_length=None):
|
|
|
69
73
|
float: 对数谱距离值, 单位为分贝 (dB)。
|
|
70
74
|
"""
|
|
71
75
|
assert ref_wav.ndim == 1 and test_wav.ndim == 1, "输入信号必须是一维数组。"
|
|
72
|
-
|
|
76
|
+
|
|
73
77
|
if win_length is None:
|
|
74
78
|
win_length = n_fft
|
|
75
79
|
|
|
76
|
-
ref_stft = librosa.stft(ref_wav,
|
|
77
|
-
|
|
80
|
+
ref_stft = librosa.stft(ref_wav,
|
|
81
|
+
n_fft=n_fft,
|
|
82
|
+
hop_length=hop_length,
|
|
83
|
+
win_length=win_length) # (F,T)
|
|
84
|
+
test_stft = librosa.stft(test_wav,
|
|
85
|
+
n_fft=n_fft,
|
|
86
|
+
hop_length=hop_length,
|
|
87
|
+
win_length=win_length) # (F,T)
|
|
78
88
|
|
|
79
|
-
ref_power_spec = np.abs(ref_stft)
|
|
80
|
-
test_power_spec = np.abs(test_stft)
|
|
89
|
+
ref_power_spec = np.abs(ref_stft)**2 # (F,T)
|
|
90
|
+
test_power_spec = np.abs(test_stft)**2 # (F,T)
|
|
81
91
|
|
|
82
92
|
ref_log_power_spec = 10 * np.log10(ref_power_spec + EPS)
|
|
83
93
|
test_log_power_spec = 10 * np.log10(test_power_spec + EPS)
|
|
84
94
|
|
|
85
|
-
squared_error = (ref_log_power_spec - test_log_power_spec)
|
|
95
|
+
squared_error = (ref_log_power_spec - test_log_power_spec)**2
|
|
86
96
|
lsd_val = np.sqrt(np.mean(squared_error))
|
|
87
97
|
|
|
88
98
|
return lsd_val
|
|
@@ -108,18 +118,19 @@ def mcd(ref_wav, test_wav, sr=16000, n_mfcc=13):
|
|
|
108
118
|
# 计算MFCC特征
|
|
109
119
|
ref_mfcc = librosa.feature.mfcc(y=ref_wav, sr=sr, n_mfcc=n_mfcc)
|
|
110
120
|
test_mfcc = librosa.feature.mfcc(y=test_wav, sr=sr, n_mfcc=n_mfcc)
|
|
111
|
-
|
|
121
|
+
|
|
112
122
|
# 计算MCD (跳过0阶系数,因为0阶主要表示能量)
|
|
113
123
|
diff = ref_mfcc[1:] - test_mfcc[1:]
|
|
114
|
-
mcd_value = (10.0 / np.log(10)) * np.sqrt(
|
|
115
|
-
|
|
124
|
+
mcd_value = (10.0 / np.log(10)) * np.sqrt(
|
|
125
|
+
2 * np.mean(np.sum(diff**2, axis=0)))
|
|
126
|
+
|
|
116
127
|
return mcd_value
|
|
117
128
|
|
|
118
129
|
|
|
119
130
|
if __name__ == "__main__":
|
|
120
|
-
ref_file = "../data/vad_example.wav"
|
|
121
|
-
test_file = "../data/vad_example.wav"
|
|
122
|
-
|
|
131
|
+
ref_file = "../data/vad_example.wav" # 参考语音文件路径
|
|
132
|
+
test_file = "../data/vad_example.wav" # 测试语音文件路径
|
|
133
|
+
|
|
123
134
|
ref_wav, ref_sr = sf.read(ref_file)
|
|
124
135
|
test_wav, test_sr = sf.read(test_file)
|
|
125
136
|
assert ref_sr == test_sr == 16000, "采样率必须为16000Hz"
|
|
@@ -133,4 +144,3 @@ if __name__ == "__main__":
|
|
|
133
144
|
|
|
134
145
|
sd_value = sd(ref_wav, test_wav)
|
|
135
146
|
print(f"频谱距离: {sd_value:.2f}")
|
|
136
|
-
|
neverlib/metrics/test_pesq.py
CHANGED
|
@@ -8,7 +8,6 @@ PESQ 包含 3 种类型的值:NB PESQ MOS、NB MOS LQO、WB MOS LQO。此包
|
|
|
8
8
|
import pesq
|
|
9
9
|
import pypesq
|
|
10
10
|
import librosa
|
|
11
|
-
import os
|
|
12
11
|
import numpy as np
|
|
13
12
|
|
|
14
13
|
fs = 16000
|
|
@@ -30,6 +29,3 @@ def mos2pesq(mos):
|
|
|
30
29
|
""" 将MOS-LQO得分[1, 4.5]映射到PESQ值[-0.5, 4.5]上,映射函数来源于:P.862.1"""
|
|
31
30
|
inlog = (4.999 - mos) / (mos - 0.999)
|
|
32
31
|
return (4.6607 - np.log(inlog)) / 1.4945
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
# print(mos2pesq(3.518))
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
'''
|
|
4
|
+
Author: 凌逆战 | Never
|
|
5
|
+
Date: 2025-09-07
|
|
6
|
+
Description: 测试neverlib导入功能
|
|
7
|
+
'''
|
|
8
|
+
import sys
|
|
9
|
+
import os
|
|
10
|
+
import time
|
|
11
|
+
|
|
12
|
+
# 确保当前目录在Python路径中,以便导入neverlib
|
|
13
|
+
# sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|
14
|
+
sys.path.append("../..")
|
|
15
|
+
print("开始测试neverlib导入功能...")
|
|
16
|
+
|
|
17
|
+
from neverlib.audio_aug import limiter
|
neverlib/utils/__init__.py
CHANGED
|
@@ -1,15 +1,26 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
"
|
|
9
|
-
"
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
"
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
1
|
+
# This file is auto-generated. Do NOT edit manually.
|
|
2
|
+
# Generated by neverlib.QA.gen_init
|
|
3
|
+
from lazy_loader import attach
|
|
4
|
+
|
|
5
|
+
__getattr__, __dir__, __all__ = attach(
|
|
6
|
+
__name__,
|
|
7
|
+
submodules=[
|
|
8
|
+
"audio_split",
|
|
9
|
+
"checkGPU",
|
|
10
|
+
"lazy_expose",
|
|
11
|
+
"lazy_module",
|
|
12
|
+
"message",
|
|
13
|
+
"utils",
|
|
14
|
+
],
|
|
15
|
+
submod_attrs={
|
|
16
|
+
"audio_split": ['audio_split_VADfunasr', 'audio_split_VADsilero', 'audio_split_ffmpeg',
|
|
17
|
+
'audio_split_np', 'audio_split_pydub', 'audio_split_random', 'audio_split_sox'],
|
|
18
|
+
"checkGPU": ['get_gpu_utilization', 'is_gpu_idle', 'monitor_gpu_utilization'],
|
|
19
|
+
"lazy_expose": ['attach_and_expose_all'],
|
|
20
|
+
"lazy_module": ['LazyModule'],
|
|
21
|
+
"message": ['send_QQEmail', 'send_QQEmail_with_images'],
|
|
22
|
+
"utils": ['EPS', 'DatasetSubfloderSplit', 'TrainValSplit', 'TrainValTestSplit', 'del_empty_folders',
|
|
23
|
+
'get_audio_segments', 'get_file_time', 'get_leaf_folders', 'get_path_list', 'pcm2wav',
|
|
24
|
+
'rename_files_and_folders', 'save_weight_histogram', 'wav2pcm'],
|
|
25
|
+
}
|
|
26
|
+
)
|
neverlib/utils/audio_split.py
CHANGED
|
@@ -10,7 +10,6 @@ from tqdm import tqdm
|
|
|
10
10
|
import soundfile as sf
|
|
11
11
|
import numpy as np
|
|
12
12
|
from .utils import get_path_list
|
|
13
|
-
from pydub import AudioSegment
|
|
14
13
|
|
|
15
14
|
|
|
16
15
|
def audio_split_ffmpeg(source_path, target_path, sr, channel_num, duration, endwith="*.pcm"):
|
|
@@ -130,6 +129,11 @@ def audio_split_pydub(source_path, target_path, sr, channel_num, duration, endwi
|
|
|
130
129
|
:param endwith: 音频格式(支持pcm和wav)
|
|
131
130
|
:param sample_width: 音频的样本宽度(字节数), 默认为2, 表示16位音频
|
|
132
131
|
"""
|
|
132
|
+
try:
|
|
133
|
+
from pydub import AudioSegment
|
|
134
|
+
except Exception as e:
|
|
135
|
+
raise ImportError("需要安装 pydub 才能使用 audio_split_pydub: pip install pydub") from e
|
|
136
|
+
|
|
133
137
|
assert duration % 1 == 0, "duration必须是1s的整数倍"
|
|
134
138
|
wav_path_list = get_path_list(source_path, end=endwith) # 获取音频文件列表
|
|
135
139
|
print("待分割的音频数: ", len(wav_path_list))
|
neverlib/utils/checkGPU.py
CHANGED
|
@@ -6,14 +6,20 @@
|
|
|
6
6
|
nohup python -u ./checkGPU.py > ./checkGPU.log 2>&1 &
|
|
7
7
|
pid 5993
|
|
8
8
|
"""
|
|
9
|
-
import subprocess
|
|
10
9
|
import time
|
|
11
|
-
import
|
|
10
|
+
import subprocess
|
|
12
11
|
import numpy as np
|
|
13
|
-
from
|
|
12
|
+
from .message import send_QQEmail
|
|
14
13
|
|
|
15
14
|
|
|
16
15
|
def is_gpu_idle():
|
|
16
|
+
try:
|
|
17
|
+
import GPUtil
|
|
18
|
+
except ImportError:
|
|
19
|
+
raise ImportError(
|
|
20
|
+
"GPUtil is required for is_gpu_idle(). "
|
|
21
|
+
"Please install it via `pip install checkGPU`."
|
|
22
|
+
)
|
|
17
23
|
try:
|
|
18
24
|
# 获取所有可见的GPU设备列表
|
|
19
25
|
gpus = GPUtil.getGPUs()
|
|
@@ -49,12 +55,14 @@ def get_gpu_utilization():
|
|
|
49
55
|
|
|
50
56
|
# 监控显卡利用率
|
|
51
57
|
def monitor_gpu_utilization(check_interval=5, duration_limit=300, threshold=20,
|
|
52
|
-
MonitorGPUs=[0, 1, 2, 3, 4, 5, 6, 7]
|
|
58
|
+
MonitorGPUs=[0, 1, 2, 3, 4, 5, 6, 7],
|
|
59
|
+
from_email="xxxxx@qq.com",
|
|
60
|
+
from_password="xxxxxxx",
|
|
61
|
+
to_email="xxxxx@qq.com"):
|
|
53
62
|
"""
|
|
54
63
|
check_interval = 5 每5s检查一次
|
|
55
64
|
duration_limit = 300 检查300/60=5min
|
|
56
65
|
threshold = 20 # 利用率阈值
|
|
57
|
-
|
|
58
66
|
Returns:
|
|
59
67
|
"""
|
|
60
68
|
alarm_times = 0 # 报警次数
|
|
@@ -91,10 +99,10 @@ def monitor_gpu_utilization(check_interval=5, duration_limit=300, threshold=20,
|
|
|
91
99
|
if t_now - last_alarm_time > 3600.0:
|
|
92
100
|
send_QQEmail(title=f"GPU利用率警告",
|
|
93
101
|
content=f"GPU 利用率低于 {threshold}% 在 {duration_limit} 秒内. "
|
|
94
|
-
|
|
95
|
-
from_email=
|
|
96
|
-
from_password=
|
|
97
|
-
to_email=
|
|
102
|
+
f"host ip: {host_ip}, 当前GPU利用率为:{utilization_mean}.",
|
|
103
|
+
from_email=from_email,
|
|
104
|
+
from_password=from_password,
|
|
105
|
+
to_email=to_email)
|
|
98
106
|
|
|
99
107
|
alarm_times += 1
|
|
100
108
|
last_alarm_time = time.time()
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
from lazy_loader import attach
|
|
2
|
+
import importlib
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def attach_and_expose_all(pkg_name: str, g: dict, submodules, submod_attrs=None):
|
|
6
|
+
submod_attrs = submod_attrs or {}
|
|
7
|
+
_getattr, _dir, _all = attach(pkg_name, submodules=submodules, submod_attrs=submod_attrs)
|
|
8
|
+
_all_set = set(_all)
|
|
9
|
+
|
|
10
|
+
def __getattr__(name: str):
|
|
11
|
+
try:
|
|
12
|
+
return _getattr(name)
|
|
13
|
+
except AttributeError:
|
|
14
|
+
for mod in submodules:
|
|
15
|
+
m = importlib.import_module(f"{pkg_name}.{mod}")
|
|
16
|
+
if hasattr(m, name) and not name.startswith("_"):
|
|
17
|
+
obj = getattr(m, name)
|
|
18
|
+
g[name] = obj
|
|
19
|
+
_all_set.add(name)
|
|
20
|
+
return obj
|
|
21
|
+
raise
|
|
22
|
+
|
|
23
|
+
def __dir__():
|
|
24
|
+
# 避免为补全而导入所有子模块,保持冷启动轻量
|
|
25
|
+
# 仅返回 attach 提供的名称 + 已经懒暴露过的名称
|
|
26
|
+
return sorted(set(_dir()) | _all_set)
|
|
27
|
+
|
|
28
|
+
g["__all__"] = sorted(_all_set)
|
|
29
|
+
return __getattr__, __dir__, g["__all__"]
|