neverlib 0.2.5__py3-none-any.whl → 0.2.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- neverlib/.claude/settings.local.json +9 -0
- neverlib/Docs/audio_aug/test_volume.ipynb +416 -0
- neverlib/Docs/audio_aug_test/test_volume.ipynb +289 -0
- neverlib/Docs/filter/biquad.ipynb +129 -0
- neverlib/Docs/filter/filter_family.ipynb +450 -0
- neverlib/Docs/filter/highpass.ipynb +139 -0
- neverlib/Docs/filter/scipy_filter_family.ipynb +110 -0
- neverlib/Docs/vad/VAD_Energy.ipynb +167 -0
- neverlib/Docs/vad/VAD_Silero.ipynb +325 -0
- neverlib/Docs/vad/VAD_WebRTC.ipynb +189 -0
- neverlib/Docs/vad/VAD_funasr.ipynb +192 -0
- neverlib/Docs/vad/VAD_rvADfast.ipynb +162 -0
- neverlib/Docs/vad/VAD_statistics.ipynb +532 -0
- neverlib/Docs/vad/VAD_tenVAD.ipynb +292 -0
- neverlib/Docs/vad/VAD_vadlib.ipynb +168 -0
- neverlib/Docs/vad/VAD_whisper.ipynb +404 -0
- neverlib/QA/gen_init.py +117 -0
- neverlib/QA/get_fun.py +19 -0
- neverlib/__init__.py +21 -4
- neverlib/audio_aug/HarmonicDistortion.py +19 -13
- neverlib/audio_aug/__init__.py +30 -12
- neverlib/audio_aug/audio_aug.py +19 -14
- neverlib/audio_aug/clip_aug.py +15 -18
- neverlib/audio_aug/coder_aug.py +44 -24
- neverlib/audio_aug/coder_aug2.py +54 -37
- neverlib/audio_aug/loss_packet_aug.py +7 -7
- neverlib/audio_aug/quant_aug.py +19 -17
- neverlib/data/000_short_enhance.wav +0 -0
- neverlib/data/3956_speech.wav +0 -0
- neverlib/data/3956_sweep.wav +0 -0
- neverlib/data/vad_example.wav +0 -0
- neverlib/data/white.wav +0 -0
- neverlib/data/white_EQ.wav +0 -0
- neverlib/data/white_matched.wav +0 -0
- neverlib/data_analyze/__init__.py +25 -20
- neverlib/data_analyze/dataset_analyzer.py +109 -114
- neverlib/data_analyze/quality_metrics.py +87 -89
- neverlib/data_analyze/rms_distrubution.py +23 -42
- neverlib/data_analyze/spectral_analysis.py +43 -46
- neverlib/data_analyze/statistics.py +76 -76
- neverlib/data_analyze/temporal_features.py +15 -6
- neverlib/data_analyze/visualization.py +208 -144
- neverlib/filter/__init__.py +17 -20
- neverlib/filter/auto_eq/__init__.py +18 -35
- neverlib/filter/auto_eq/de_eq.py +0 -2
- neverlib/filter/common.py +24 -5
- neverlib/metrics/DNSMOS/bak_ovr.onnx +0 -0
- neverlib/metrics/DNSMOS/model_v8.onnx +0 -0
- neverlib/metrics/DNSMOS/sig.onnx +0 -0
- neverlib/metrics/DNSMOS/sig_bak_ovr.onnx +0 -0
- neverlib/metrics/__init__.py +23 -0
- neverlib/metrics/dnsmos.py +4 -15
- neverlib/metrics/pDNSMOS/sig_bak_ovr.onnx +0 -0
- neverlib/metrics/pesq_c/PESQ +0 -0
- neverlib/metrics/pesq_c/dsp.c +553 -0
- neverlib/metrics/pesq_c/dsp.h +138 -0
- neverlib/metrics/pesq_c/pesq.h +294 -0
- neverlib/metrics/pesq_c/pesqdsp.c +1047 -0
- neverlib/metrics/pesq_c/pesqio.c +392 -0
- neverlib/metrics/pesq_c/pesqmain.c +610 -0
- neverlib/metrics/pesq_c/pesqmod.c +1417 -0
- neverlib/metrics/pesq_c/pesqpar.h +297 -0
- neverlib/metrics/snr.py +5 -1
- neverlib/metrics/spec.py +31 -21
- neverlib/metrics/test_pesq.py +0 -4
- neverlib/tests/test_imports.py +17 -0
- neverlib/utils/__init__.py +26 -15
- neverlib/utils/audio_split.py +5 -1
- neverlib/utils/checkGPU.py +17 -9
- neverlib/utils/lazy_expose.py +29 -0
- neverlib/utils/utils.py +40 -12
- neverlib/vad/__init__.py +33 -25
- neverlib/vad/class_get_speech.py +1 -1
- neverlib/vad/class_vad.py +3 -3
- neverlib/vad/img.png +0 -0
- {neverlib-0.2.5.dist-info → neverlib-0.2.7.dist-info}/METADATA +20 -17
- {neverlib-0.2.5.dist-info → neverlib-0.2.7.dist-info}/RECORD +80 -37
- {neverlib-0.2.5.dist-info → neverlib-0.2.7.dist-info}/WHEEL +0 -0
- {neverlib-0.2.5.dist-info → neverlib-0.2.7.dist-info}/licenses/LICENSE +0 -0
- {neverlib-0.2.5.dist-info → neverlib-0.2.7.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,294 @@
|
|
|
1
|
+
/*****************************************************************************
|
|
2
|
+
|
|
3
|
+
Perceptual Evaluation of Speech Quality (PESQ)
|
|
4
|
+
ITU-T Recommendation P.862.
|
|
5
|
+
Version 1.2 - 2 August 2002.
|
|
6
|
+
|
|
7
|
+
****************************************
|
|
8
|
+
PESQ Intellectual Property Rights Notice
|
|
9
|
+
****************************************
|
|
10
|
+
|
|
11
|
+
DEFINITIONS:
|
|
12
|
+
------------
|
|
13
|
+
For the purposes of this Intellectual Property Rights Notice
|
|
14
|
+
the terms �Perceptual Evaluation of Speech Quality Algorithm�
|
|
15
|
+
and �PESQ Algorithm� refer to the objective speech quality
|
|
16
|
+
measurement algorithm defined in ITU-T Recommendation P.862;
|
|
17
|
+
the term �PESQ Software� refers to the C-code component of P.862.
|
|
18
|
+
|
|
19
|
+
NOTICE:
|
|
20
|
+
-------
|
|
21
|
+
All copyright, trade marks, trade names, patents, know-how and
|
|
22
|
+
all or any other intellectual rights subsisting in or used in
|
|
23
|
+
connection with including all algorithms, documents and manuals
|
|
24
|
+
relating to the PESQ Algorithm and or PESQ Software are and remain
|
|
25
|
+
the sole property in law, ownership, regulations, treaties and
|
|
26
|
+
patent rights of the Owners identified below. The user may not
|
|
27
|
+
dispute or question the ownership of the PESQ Algorithm and
|
|
28
|
+
or PESQ Software.
|
|
29
|
+
|
|
30
|
+
OWNERS ARE:
|
|
31
|
+
-----------
|
|
32
|
+
|
|
33
|
+
1. British Telecommunications plc (BT), all rights assigned
|
|
34
|
+
to Psytechnics Limited
|
|
35
|
+
2. Royal KPN NV, all rights assigned to OPTICOM GmbH
|
|
36
|
+
|
|
37
|
+
RESTRICTIONS:
|
|
38
|
+
-------------
|
|
39
|
+
|
|
40
|
+
The user cannot:
|
|
41
|
+
|
|
42
|
+
1. alter, duplicate, modify, adapt, or translate in whole or in
|
|
43
|
+
part any aspect of the PESQ Algorithm and or PESQ Software
|
|
44
|
+
2. sell, hire, loan, distribute, dispose or put to any commercial
|
|
45
|
+
use other than those permitted below in whole or in part any
|
|
46
|
+
aspect of the PESQ Algorithm and or PESQ Software
|
|
47
|
+
|
|
48
|
+
PERMITTED USE:
|
|
49
|
+
--------------
|
|
50
|
+
|
|
51
|
+
The user may:
|
|
52
|
+
|
|
53
|
+
1. Use the PESQ Software to:
|
|
54
|
+
i) understand the PESQ Algorithm; or
|
|
55
|
+
ii) evaluate the ability of the PESQ Algorithm to perform
|
|
56
|
+
its intended function of predicting the speech quality
|
|
57
|
+
of a system; or
|
|
58
|
+
iii) evaluate the computational complexity of the PESQ Algorithm,
|
|
59
|
+
with the limitation that none of said evaluations or its
|
|
60
|
+
results shall be used for external commercial use.
|
|
61
|
+
|
|
62
|
+
2. Use the PESQ Software to test if an implementation of the PESQ
|
|
63
|
+
Algorithm conforms to ITU-T Recommendation P.862.
|
|
64
|
+
|
|
65
|
+
3. With the prior written permission of both Psytechnics Limited
|
|
66
|
+
and OPTICOM GmbH, use the PESQ Software in accordance with the
|
|
67
|
+
above Restrictions to perform work that meets all of the following
|
|
68
|
+
criteria:
|
|
69
|
+
i) the work must contribute directly to the maintenance of an
|
|
70
|
+
existing ITU recommendation or the development of a new ITU
|
|
71
|
+
recommendation under an approved ITU Study Item; and
|
|
72
|
+
ii) the work and its results must be fully described in a
|
|
73
|
+
written contribution to the ITU that is presented at a formal
|
|
74
|
+
ITU meeting within one year of the start of the work; and
|
|
75
|
+
iii) neither the work nor its results shall be put to any
|
|
76
|
+
commercial use other than making said contribution to the ITU.
|
|
77
|
+
Said permission will be provided on a case-by-case basis.
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
ANY OTHER USE OR APPLICATION OF THE PESQ SOFTWARE AND/OR THE PESQ
|
|
81
|
+
ALGORITHM WILL REQUIRE A PESQ LICENCE AGREEMENT, WHICH MAY BE OBTAINED
|
|
82
|
+
FROM EITHER OPTICOM GMBH OR PSYTECHNICS LIMITED.
|
|
83
|
+
|
|
84
|
+
EACH COMPANY OFFERS OEM LICENSE AGREEMENTS, WHICH COMBINE OEM
|
|
85
|
+
IMPLEMENTATIONS OF THE PESQ ALGORITHM TOGETHER WITH A PESQ PATENT LICENSE
|
|
86
|
+
AGREEMENT. PESQ PATENT-ONLY LICENSE AGREEMENTS MAY BE OBTAINED FROM OPTICOM.
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
***********************************************************************
|
|
90
|
+
* OPTICOM GmbH * Psytechnics Limited *
|
|
91
|
+
* Am Weichselgarten 7, * Fraser House, 23 Museum Street, *
|
|
92
|
+
* D- 91058 Erlangen, Germany * Ipswich IP1 1HN, England *
|
|
93
|
+
* Phone: +49 (0) 9131 691 160 * Phone: +44 (0) 1473 261 800 *
|
|
94
|
+
* Fax: +49 (0) 9131 691 325 * Fax: +44 (0) 1473 261 880 *
|
|
95
|
+
* E-mail: info@opticom.de, * E-mail: info@psytechnics.com, *
|
|
96
|
+
* www.opticom.de * www.psytechnics.com *
|
|
97
|
+
***********************************************************************
|
|
98
|
+
|
|
99
|
+
Further information is also available from www.pesq.org
|
|
100
|
+
|
|
101
|
+
*****************************************************************************/
|
|
102
|
+
|
|
103
|
+
#include <string.h>
|
|
104
|
+
#include <stdlib.h>
|
|
105
|
+
|
|
106
|
+
#ifndef TRUE
|
|
107
|
+
#define TRUE 1
|
|
108
|
+
#endif
|
|
109
|
+
|
|
110
|
+
#ifndef FALSE
|
|
111
|
+
#define FALSE 0
|
|
112
|
+
#endif
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
#define LINIIR 60
|
|
116
|
+
|
|
117
|
+
#define MAXNUTTERANCES 50
|
|
118
|
+
|
|
119
|
+
#define WHOLE_SIGNAL -1
|
|
120
|
+
|
|
121
|
+
#define LINIIR 60
|
|
122
|
+
#define LSMJ 20
|
|
123
|
+
#define LFBANK 35
|
|
124
|
+
|
|
125
|
+
#define DATAPADDING_MSECS 320
|
|
126
|
+
#define SEARCHBUFFER 75
|
|
127
|
+
|
|
128
|
+
#define EPS 1E-12
|
|
129
|
+
|
|
130
|
+
#define MINSPEECHLGTH 4
|
|
131
|
+
|
|
132
|
+
#define JOINSPEECHLGTH 50
|
|
133
|
+
|
|
134
|
+
#define MINUTTLENGTH 50
|
|
135
|
+
|
|
136
|
+
#define SATDB 90.31
|
|
137
|
+
#define FIXDB -32.0
|
|
138
|
+
|
|
139
|
+
#define TWOPI 6.28318530717959
|
|
140
|
+
|
|
141
|
+
int Nb ;
|
|
142
|
+
|
|
143
|
+
#define Nfmax 512
|
|
144
|
+
|
|
145
|
+
#define Sp_8k 2.764344e-5
|
|
146
|
+
#define Sl_8k 1.866055e-1
|
|
147
|
+
|
|
148
|
+
#define Sp_16k 6.910853e-006
|
|
149
|
+
#define Sl_16k 1.866055e-001
|
|
150
|
+
|
|
151
|
+
extern float Sp;
|
|
152
|
+
extern float Sl;
|
|
153
|
+
|
|
154
|
+
#define Dz 0.312
|
|
155
|
+
|
|
156
|
+
#define gamma 0.001
|
|
157
|
+
|
|
158
|
+
#define Tl 10000.0f
|
|
159
|
+
|
|
160
|
+
#define Ts 10000000.0f
|
|
161
|
+
|
|
162
|
+
#define Tt 0.02f
|
|
163
|
+
|
|
164
|
+
#define Tn 0.01f
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
#ifndef min
|
|
168
|
+
#define min(a,b) (((a) < (b)) ? (a) : (b))
|
|
169
|
+
#endif
|
|
170
|
+
|
|
171
|
+
#ifndef max
|
|
172
|
+
#define max(a,b) (((a) > (b)) ? (a) : (b))
|
|
173
|
+
#endif
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
typedef struct {
|
|
177
|
+
char path_name[512];
|
|
178
|
+
char file_name [128];
|
|
179
|
+
long Nsamples;
|
|
180
|
+
long apply_swap;
|
|
181
|
+
|
|
182
|
+
float * data;
|
|
183
|
+
float * VAD;
|
|
184
|
+
float * logVAD;
|
|
185
|
+
} SIGNAL_INFO;
|
|
186
|
+
|
|
187
|
+
typedef struct {
|
|
188
|
+
long Nutterances;
|
|
189
|
+
long Largest_uttsize;
|
|
190
|
+
long Nsurf_samples;
|
|
191
|
+
|
|
192
|
+
long Crude_DelayEst;
|
|
193
|
+
float Crude_DelayConf;
|
|
194
|
+
long UttSearch_Start[MAXNUTTERANCES];
|
|
195
|
+
long UttSearch_End[MAXNUTTERANCES];
|
|
196
|
+
long Utt_DelayEst[MAXNUTTERANCES];
|
|
197
|
+
long Utt_Delay[MAXNUTTERANCES];
|
|
198
|
+
float Utt_DelayConf[MAXNUTTERANCES];
|
|
199
|
+
long Utt_Start[MAXNUTTERANCES];
|
|
200
|
+
long Utt_End[MAXNUTTERANCES];
|
|
201
|
+
|
|
202
|
+
float pesq_mos;
|
|
203
|
+
float subj_mos;
|
|
204
|
+
int cond_nr;
|
|
205
|
+
} ERROR_INFO;
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
extern long Fs;
|
|
209
|
+
extern long Downsample;
|
|
210
|
+
extern float * InIIR_Hsos;
|
|
211
|
+
extern long Align_Nfft;
|
|
212
|
+
|
|
213
|
+
extern long Fs_8k;
|
|
214
|
+
extern long Downsample_8k;
|
|
215
|
+
extern long InIIR_Nsos_8k;
|
|
216
|
+
extern long Align_Nfft_8k;
|
|
217
|
+
|
|
218
|
+
extern long Fs_16k;
|
|
219
|
+
extern long Downsample_16k;
|
|
220
|
+
extern long InIIR_Nsos_16k;
|
|
221
|
+
extern long Align_Nfft_16k;
|
|
222
|
+
|
|
223
|
+
extern float * InIIR_Hsos;
|
|
224
|
+
|
|
225
|
+
void input_filter(
|
|
226
|
+
SIGNAL_INFO * ref_info, SIGNAL_INFO * deg_info, float * ftmp );
|
|
227
|
+
void apply_filters( float * data, long Nsamples );
|
|
228
|
+
void make_stereo_file (char *, SIGNAL_INFO *, SIGNAL_INFO *);
|
|
229
|
+
void make_stereo_file2 (char *, SIGNAL_INFO *, float *);
|
|
230
|
+
void select_rate( long sample_rate,
|
|
231
|
+
long * Error_Flag, char ** Error_Type );
|
|
232
|
+
int file_exist( char * fname );
|
|
233
|
+
void load_src( long * Error_Flag, char ** Error_Type,
|
|
234
|
+
SIGNAL_INFO * sinfo);
|
|
235
|
+
void alloc_other( SIGNAL_INFO * ref_info, SIGNAL_INFO * deg_info,
|
|
236
|
+
long * Error_Flag, char ** Error_Type, float ** ftmp);
|
|
237
|
+
void calc_VAD( SIGNAL_INFO * pinfo );
|
|
238
|
+
int id_searchwindows( SIGNAL_INFO * ref_info, SIGNAL_INFO * deg_info,
|
|
239
|
+
ERROR_INFO * err_info );
|
|
240
|
+
void id_utterances( SIGNAL_INFO * ref_info, SIGNAL_INFO * deg_info,
|
|
241
|
+
ERROR_INFO * err_info );
|
|
242
|
+
void utterance_split( SIGNAL_INFO * ref_info, SIGNAL_INFO * deg_info,
|
|
243
|
+
ERROR_INFO * err_info, float * ftmp );
|
|
244
|
+
void utterance_locate( SIGNAL_INFO * ref_info, SIGNAL_INFO * deg_info,
|
|
245
|
+
ERROR_INFO * err_info, float * ftmp );
|
|
246
|
+
void auditory_transform( SIGNAL_INFO * ref_info, SIGNAL_INFO * deg_info,
|
|
247
|
+
ERROR_INFO * err_info, long Utt_id, float * ftmp);
|
|
248
|
+
void calc_err( SIGNAL_INFO * ref_info, SIGNAL_INFO * deg_info,
|
|
249
|
+
ERROR_INFO * err_info, long Utt_id);
|
|
250
|
+
void extract_params( SIGNAL_INFO * ref_info, SIGNAL_INFO * deg_info,
|
|
251
|
+
ERROR_INFO * err_info, long Utt_id, float * ftmp );
|
|
252
|
+
void utterance_process(SIGNAL_INFO * ref_info, SIGNAL_INFO * deg_info,
|
|
253
|
+
ERROR_INFO * err_info, long Utt_id, float * ftmp);
|
|
254
|
+
void DC_block( float * data, long Nsamples );
|
|
255
|
+
void apply_filter ( float * data, long Nsamples, int, double [][2] );
|
|
256
|
+
double pow_of (const float * const , long , long, long);
|
|
257
|
+
void apply_VAD(
|
|
258
|
+
SIGNAL_INFO * pinfo, float * data, float * VAD, float * logVAD );
|
|
259
|
+
void crude_align(
|
|
260
|
+
SIGNAL_INFO * ref_info, SIGNAL_INFO * deg_info, ERROR_INFO * err_info,
|
|
261
|
+
long Utt_id, float * ftmp);
|
|
262
|
+
void time_align(
|
|
263
|
+
SIGNAL_INFO * ref_info, SIGNAL_INFO * deg_info, ERROR_INFO * err_info,
|
|
264
|
+
long Utt_id, float * ftmp );
|
|
265
|
+
void split_align( SIGNAL_INFO * ref_info, SIGNAL_INFO * deg_info,
|
|
266
|
+
ERROR_INFO * err_info, float * ftmp,
|
|
267
|
+
long Utt_Start, long Utt_SpeechStart, long Utt_SpeechEnd, long Utt_End,
|
|
268
|
+
long Utt_DelayEst, float Utt_DelayConf,
|
|
269
|
+
long * Best_ED1, long * Best_D1, float * Best_DC1,
|
|
270
|
+
long * Best_ED2, long * Best_D2, float * Best_DC2,
|
|
271
|
+
long * Best_BP );
|
|
272
|
+
void pesq_psychoacoustic_model(
|
|
273
|
+
SIGNAL_INFO * ref_info, SIGNAL_INFO * deg_info,
|
|
274
|
+
ERROR_INFO * err_info, float * ftmp);
|
|
275
|
+
void apply_pesq( float * x_data, float * ref_surf,
|
|
276
|
+
float * y_data, float * deg_surf, long NVAD_windows, float * ftmp,
|
|
277
|
+
ERROR_INFO * err_info );
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
#define D_POW_F 2
|
|
282
|
+
#define D_POW_S 6
|
|
283
|
+
#define D_POW_T 2
|
|
284
|
+
|
|
285
|
+
#define A_POW_F 1
|
|
286
|
+
#define A_POW_S 6
|
|
287
|
+
#define A_POW_T 2
|
|
288
|
+
|
|
289
|
+
|
|
290
|
+
#define D_WEIGHT 0.1
|
|
291
|
+
#define A_WEIGHT 0.0309
|
|
292
|
+
|
|
293
|
+
/* END OF FILE */
|
|
294
|
+
|