neverlib 0.2.1__py3-none-any.whl → 0.2.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- neverlib/__init__.py +2 -2
- neverlib/audio_aug/__init__.py +1 -1
- neverlib/audio_aug/audio_aug.py +4 -5
- neverlib/dataAnalyze/README.md +234 -0
- neverlib/dataAnalyze/__init__.py +87 -0
- neverlib/dataAnalyze/dataset_analyzer.py +590 -0
- neverlib/dataAnalyze/quality_metrics.py +364 -0
- neverlib/dataAnalyze/rms_distrubution.py +62 -0
- neverlib/dataAnalyze/spectral_analysis.py +218 -0
- neverlib/dataAnalyze/statistics.py +406 -0
- neverlib/dataAnalyze/temporal_features.py +126 -0
- neverlib/dataAnalyze/visualization.py +468 -0
- neverlib/filter/AudoEQ/README.md +165 -0
- neverlib/filter/AudoEQ/auto_eq_de.py +361 -0
- neverlib/filter/AudoEQ/auto_eq_ga_advanced.py +577 -0
- neverlib/filter/AudoEQ/auto_eq_ga_basic.py +380 -0
- neverlib/filter/AudoEQ/auto_eq_spectral_direct.py +75 -0
- neverlib/filter/README.md +101 -0
- neverlib/filter/__init__.py +7 -0
- neverlib/filter/biquad.py +45 -0
- neverlib/filter/common.py +5 -6
- neverlib/filter/core.py +339 -0
- neverlib/metrics/dnsmos.py +160 -0
- neverlib/metrics/snr.py +177 -0
- neverlib/metrics/spec.py +45 -0
- neverlib/metrics/test_pesq.py +35 -0
- neverlib/metrics/time.py +68 -0
- neverlib/tests/test_vad.py +21 -0
- neverlib/utils/audio_split.py +5 -3
- neverlib/utils/message.py +4 -4
- neverlib/utils/utils.py +32 -15
- neverlib/vad/PreProcess.py +1 -1
- neverlib/vad/README.md +10 -10
- neverlib/vad/VAD_Energy.py +1 -1
- neverlib/vad/VAD_Silero.py +1 -1
- neverlib/vad/VAD_WebRTC.py +1 -1
- neverlib/vad/VAD_funasr.py +1 -1
- neverlib/vad/VAD_statistics.py +3 -3
- neverlib/vad/VAD_vadlib.py +2 -2
- neverlib/vad/VAD_whisper.py +1 -1
- neverlib/vad/__init__.py +1 -1
- neverlib/vad/class_get_speech.py +4 -4
- neverlib/vad/class_vad.py +1 -1
- neverlib/vad/utils.py +47 -5
- {neverlib-0.2.1.dist-info → neverlib-0.2.3.dist-info}/METADATA +120 -120
- neverlib-0.2.3.dist-info/RECORD +53 -0
- {neverlib-0.2.1.dist-info → neverlib-0.2.3.dist-info}/WHEEL +1 -1
- neverlib/Documents/vad/VAD_Energy.ipynb +0 -159
- neverlib/Documents/vad/VAD_Silero.ipynb +0 -305
- neverlib/Documents/vad/VAD_WebRTC.ipynb +0 -183
- neverlib/Documents/vad/VAD_funasr.ipynb +0 -179
- neverlib/Documents/vad/VAD_ppasr.ipynb +0 -175
- neverlib/Documents/vad/VAD_statistics.ipynb +0 -522
- neverlib/Documents/vad/VAD_vadlib.ipynb +0 -184
- neverlib/Documents/vad/VAD_whisper.ipynb +0 -430
- neverlib/utils/waveform_analyzer.py +0 -51
- neverlib/wav_data/000_short.wav +0 -0
- neverlib-0.2.1.dist-info/RECORD +0 -40
- {neverlib-0.2.1.dist-info → neverlib-0.2.3.dist-info}/licenses/LICENSE +0 -0
- {neverlib-0.2.1.dist-info → neverlib-0.2.3.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,406 @@
|
|
|
1
|
+
"""
|
|
2
|
+
统计分析工具模块
|
|
3
|
+
Statistics Analysis Module
|
|
4
|
+
|
|
5
|
+
提供音频数据集统计分析功能
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import numpy as np
|
|
9
|
+
import librosa
|
|
10
|
+
import os
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import List, Dict, Tuple, Optional, Union
|
|
13
|
+
import matplotlib.pyplot as plt
|
|
14
|
+
from collections import defaultdict
|
|
15
|
+
import json
|
|
16
|
+
from .utils import rms_amplitude, dB
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class AudioStatistics:
|
|
20
|
+
"""音频统计分析类"""
|
|
21
|
+
|
|
22
|
+
def __init__(self, sr: int = 22050):
|
|
23
|
+
"""
|
|
24
|
+
初始化统计分析器
|
|
25
|
+
|
|
26
|
+
Args:
|
|
27
|
+
sr: 采样率
|
|
28
|
+
"""
|
|
29
|
+
self.sr = sr
|
|
30
|
+
self.audio_data = []
|
|
31
|
+
self.file_paths = []
|
|
32
|
+
self.statistics = {}
|
|
33
|
+
|
|
34
|
+
def add_audio_file(self, file_path: str, audio_data: Optional[np.ndarray] = None):
|
|
35
|
+
"""
|
|
36
|
+
添加音频文件到分析列表
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
file_path: 音频文件路径
|
|
40
|
+
audio_data: 音频数据(如果不提供则从文件加载)
|
|
41
|
+
"""
|
|
42
|
+
if audio_data is None:
|
|
43
|
+
try:
|
|
44
|
+
audio_data, _ = librosa.load(file_path, sr=self.sr)
|
|
45
|
+
except Exception as e:
|
|
46
|
+
print(f"Error loading {file_path}: {e}")
|
|
47
|
+
return
|
|
48
|
+
|
|
49
|
+
self.audio_data.append(audio_data)
|
|
50
|
+
self.file_paths.append(file_path)
|
|
51
|
+
|
|
52
|
+
def add_audio_directory(self, directory: str, extensions: List[str] = None):
|
|
53
|
+
"""
|
|
54
|
+
批量添加目录中的音频文件
|
|
55
|
+
|
|
56
|
+
Args:
|
|
57
|
+
directory: 音频文件目录
|
|
58
|
+
extensions: 支持的文件扩展名
|
|
59
|
+
"""
|
|
60
|
+
if extensions is None:
|
|
61
|
+
extensions = ['.wav', '.mp3', '.flac', '.m4a', '.aac']
|
|
62
|
+
|
|
63
|
+
directory = Path(directory)
|
|
64
|
+
for ext in extensions:
|
|
65
|
+
for file_path in directory.glob(f'*{ext}'):
|
|
66
|
+
self.add_audio_file(str(file_path))
|
|
67
|
+
|
|
68
|
+
def compute_duration_statistics(self) -> Dict:
|
|
69
|
+
"""
|
|
70
|
+
计算音频时长统计
|
|
71
|
+
|
|
72
|
+
Returns:
|
|
73
|
+
时长统计信息
|
|
74
|
+
"""
|
|
75
|
+
durations = [len(audio) / self.sr for audio in self.audio_data]
|
|
76
|
+
|
|
77
|
+
if not durations:
|
|
78
|
+
return {}
|
|
79
|
+
|
|
80
|
+
stats = {
|
|
81
|
+
'count': len(durations),
|
|
82
|
+
'total_duration': sum(durations),
|
|
83
|
+
'mean_duration': np.mean(durations),
|
|
84
|
+
'median_duration': np.median(durations),
|
|
85
|
+
'std_duration': np.std(durations),
|
|
86
|
+
'min_duration': np.min(durations),
|
|
87
|
+
'max_duration': np.max(durations),
|
|
88
|
+
'percentiles': {
|
|
89
|
+
'25th': np.percentile(durations, 25),
|
|
90
|
+
'75th': np.percentile(durations, 75),
|
|
91
|
+
'90th': np.percentile(durations, 90),
|
|
92
|
+
'95th': np.percentile(durations, 95)
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
return stats
|
|
97
|
+
|
|
98
|
+
def compute_amplitude_statistics(self) -> Dict:
|
|
99
|
+
"""
|
|
100
|
+
计算幅度统计
|
|
101
|
+
|
|
102
|
+
Returns:
|
|
103
|
+
幅度统计信息
|
|
104
|
+
"""
|
|
105
|
+
all_amplitudes = []
|
|
106
|
+
max_amplitudes = []
|
|
107
|
+
rms_values = []
|
|
108
|
+
|
|
109
|
+
for audio in self.audio_data:
|
|
110
|
+
all_amplitudes.extend(np.abs(audio).tolist())
|
|
111
|
+
max_amplitudes.append(np.max(np.abs(audio)))
|
|
112
|
+
rms_values.append(rms_amplitude(audio))
|
|
113
|
+
|
|
114
|
+
if not all_amplitudes:
|
|
115
|
+
return {}
|
|
116
|
+
|
|
117
|
+
all_amplitudes = np.array(all_amplitudes)
|
|
118
|
+
|
|
119
|
+
stats = {
|
|
120
|
+
'overall': {
|
|
121
|
+
'mean': np.mean(all_amplitudes),
|
|
122
|
+
'std': np.std(all_amplitudes),
|
|
123
|
+
'min': np.min(all_amplitudes),
|
|
124
|
+
'max': np.max(all_amplitudes),
|
|
125
|
+
'percentiles': {
|
|
126
|
+
'50th': np.percentile(all_amplitudes, 50),
|
|
127
|
+
'90th': np.percentile(all_amplitudes, 90),
|
|
128
|
+
'95th': np.percentile(all_amplitudes, 95),
|
|
129
|
+
'99th': np.percentile(all_amplitudes, 99)
|
|
130
|
+
}
|
|
131
|
+
},
|
|
132
|
+
'peak_amplitudes': {
|
|
133
|
+
'mean': np.mean(max_amplitudes),
|
|
134
|
+
'std': np.std(max_amplitudes),
|
|
135
|
+
'min': np.min(max_amplitudes),
|
|
136
|
+
'max': np.max(max_amplitudes)
|
|
137
|
+
},
|
|
138
|
+
'rms_values': {
|
|
139
|
+
'mean': np.mean(rms_values),
|
|
140
|
+
'std': np.std(rms_values),
|
|
141
|
+
'min': np.min(rms_values),
|
|
142
|
+
'max': np.max(rms_values),
|
|
143
|
+
'mean_db': dB(np.mean(rms_values)),
|
|
144
|
+
'std_db': np.std([dB(rms) for rms in rms_values])
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
return stats
|
|
149
|
+
|
|
150
|
+
def compute_frequency_statistics(self) -> Dict:
|
|
151
|
+
"""
|
|
152
|
+
计算频域统计
|
|
153
|
+
|
|
154
|
+
Returns:
|
|
155
|
+
频域统计信息
|
|
156
|
+
"""
|
|
157
|
+
spectral_centroids = []
|
|
158
|
+
spectral_bandwidths = []
|
|
159
|
+
spectral_rolloffs = []
|
|
160
|
+
|
|
161
|
+
for audio in self.audio_data:
|
|
162
|
+
# 计算频谱特征
|
|
163
|
+
centroid = librosa.feature.spectral_centroid(y=audio, sr=self.sr)[0]
|
|
164
|
+
bandwidth = librosa.feature.spectral_bandwidth(y=audio, sr=self.sr)[0]
|
|
165
|
+
rolloff = librosa.feature.spectral_rolloff(y=audio, sr=self.sr)[0]
|
|
166
|
+
|
|
167
|
+
spectral_centroids.extend(centroid.tolist())
|
|
168
|
+
spectral_bandwidths.extend(bandwidth.tolist())
|
|
169
|
+
spectral_rolloffs.extend(rolloff.tolist())
|
|
170
|
+
|
|
171
|
+
if not spectral_centroids:
|
|
172
|
+
return {}
|
|
173
|
+
|
|
174
|
+
stats = {
|
|
175
|
+
'spectral_centroid': {
|
|
176
|
+
'mean': np.mean(spectral_centroids),
|
|
177
|
+
'std': np.std(spectral_centroids),
|
|
178
|
+
'min': np.min(spectral_centroids),
|
|
179
|
+
'max': np.max(spectral_centroids)
|
|
180
|
+
},
|
|
181
|
+
'spectral_bandwidth': {
|
|
182
|
+
'mean': np.mean(spectral_bandwidths),
|
|
183
|
+
'std': np.std(spectral_bandwidths),
|
|
184
|
+
'min': np.min(spectral_bandwidths),
|
|
185
|
+
'max': np.max(spectral_bandwidths)
|
|
186
|
+
},
|
|
187
|
+
'spectral_rolloff': {
|
|
188
|
+
'mean': np.mean(spectral_rolloffs),
|
|
189
|
+
'std': np.std(spectral_rolloffs),
|
|
190
|
+
'min': np.min(spectral_rolloffs),
|
|
191
|
+
'max': np.max(spectral_rolloffs)
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
return stats
|
|
196
|
+
|
|
197
|
+
def detect_outliers(self, feature: str = 'duration', threshold: float = 2.0) -> List[Tuple[str, float]]:
|
|
198
|
+
"""
|
|
199
|
+
检测异常值
|
|
200
|
+
|
|
201
|
+
Args:
|
|
202
|
+
feature: 要检测的特征 ('duration', 'max_amplitude', 'rms')
|
|
203
|
+
threshold: Z-score阈值
|
|
204
|
+
|
|
205
|
+
Returns:
|
|
206
|
+
异常文件列表 [(文件路径, 特征值)]
|
|
207
|
+
"""
|
|
208
|
+
if feature == 'duration':
|
|
209
|
+
values = [len(audio) / self.sr for audio in self.audio_data]
|
|
210
|
+
elif feature == 'max_amplitude':
|
|
211
|
+
values = [np.max(np.abs(audio)) for audio in self.audio_data]
|
|
212
|
+
elif feature == 'rms':
|
|
213
|
+
values = [rms_amplitude(audio) for audio in self.audio_data]
|
|
214
|
+
else:
|
|
215
|
+
raise ValueError(f"Unknown feature: {feature}")
|
|
216
|
+
|
|
217
|
+
values = np.array(values)
|
|
218
|
+
mean_val = np.mean(values)
|
|
219
|
+
std_val = np.std(values)
|
|
220
|
+
|
|
221
|
+
outliers = []
|
|
222
|
+
for i, (path, val) in enumerate(zip(self.file_paths, values)):
|
|
223
|
+
z_score = abs(val - mean_val) / (std_val + 1e-10)
|
|
224
|
+
if z_score > threshold:
|
|
225
|
+
outliers.append((path, val))
|
|
226
|
+
|
|
227
|
+
return outliers
|
|
228
|
+
|
|
229
|
+
def generate_distribution_analysis(self) -> Dict:
|
|
230
|
+
"""
|
|
231
|
+
生成分布分析
|
|
232
|
+
|
|
233
|
+
Returns:
|
|
234
|
+
分布分析结果
|
|
235
|
+
"""
|
|
236
|
+
analysis = {
|
|
237
|
+
'duration_distribution': self._analyze_distribution([len(audio) / self.sr for audio in self.audio_data]),
|
|
238
|
+
'amplitude_distribution': self._analyze_distribution([np.max(np.abs(audio)) for audio in self.audio_data]),
|
|
239
|
+
'rms_distribution': self._analyze_distribution([rms_amplitude(audio) for audio in self.audio_data])
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
return analysis
|
|
243
|
+
|
|
244
|
+
def _analyze_distribution(self, values: List[float]) -> Dict:
|
|
245
|
+
"""
|
|
246
|
+
分析数值分布
|
|
247
|
+
|
|
248
|
+
Args:
|
|
249
|
+
values: 数值列表
|
|
250
|
+
|
|
251
|
+
Returns:
|
|
252
|
+
分布分析结果
|
|
253
|
+
"""
|
|
254
|
+
if not values:
|
|
255
|
+
return {}
|
|
256
|
+
|
|
257
|
+
values = np.array(values)
|
|
258
|
+
|
|
259
|
+
# 计算偏度和峰度
|
|
260
|
+
mean_val = np.mean(values)
|
|
261
|
+
std_val = np.std(values)
|
|
262
|
+
|
|
263
|
+
# 偏度 (skewness)
|
|
264
|
+
skewness = np.mean(((values - mean_val) / (std_val + 1e-10)) ** 3)
|
|
265
|
+
|
|
266
|
+
# 峰度 (kurtosis)
|
|
267
|
+
kurtosis = np.mean(((values - mean_val) / (std_val + 1e-10)) ** 4) - 3
|
|
268
|
+
|
|
269
|
+
return {
|
|
270
|
+
'mean': mean_val,
|
|
271
|
+
'std': std_val,
|
|
272
|
+
'skewness': skewness,
|
|
273
|
+
'kurtosis': kurtosis,
|
|
274
|
+
'distribution_type': self._classify_distribution(skewness, kurtosis)
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
def _classify_distribution(self, skewness: float, kurtosis: float) -> str:
|
|
278
|
+
"""
|
|
279
|
+
分类分布类型
|
|
280
|
+
|
|
281
|
+
Args:
|
|
282
|
+
skewness: 偏度
|
|
283
|
+
kurtosis: 峰度
|
|
284
|
+
|
|
285
|
+
Returns:
|
|
286
|
+
分布类型描述
|
|
287
|
+
"""
|
|
288
|
+
if abs(skewness) < 0.5 and abs(kurtosis) < 0.5:
|
|
289
|
+
return "approximately_normal"
|
|
290
|
+
elif skewness > 0.5:
|
|
291
|
+
return "right_skewed"
|
|
292
|
+
elif skewness < -0.5:
|
|
293
|
+
return "left_skewed"
|
|
294
|
+
elif kurtosis > 0.5:
|
|
295
|
+
return "heavy_tailed"
|
|
296
|
+
elif kurtosis < -0.5:
|
|
297
|
+
return "light_tailed"
|
|
298
|
+
else:
|
|
299
|
+
return "unknown"
|
|
300
|
+
|
|
301
|
+
def compute_all_statistics(self) -> Dict:
|
|
302
|
+
"""
|
|
303
|
+
计算所有统计信息
|
|
304
|
+
|
|
305
|
+
Returns:
|
|
306
|
+
完整统计报告
|
|
307
|
+
"""
|
|
308
|
+
self.statistics = {
|
|
309
|
+
'file_count': len(self.audio_data),
|
|
310
|
+
'sample_rate': self.sr,
|
|
311
|
+
'duration_stats': self.compute_duration_statistics(),
|
|
312
|
+
'amplitude_stats': self.compute_amplitude_statistics(),
|
|
313
|
+
'frequency_stats': self.compute_frequency_statistics(),
|
|
314
|
+
'distribution_analysis': self.generate_distribution_analysis(),
|
|
315
|
+
'outliers': {
|
|
316
|
+
'duration': self.detect_outliers('duration'),
|
|
317
|
+
'max_amplitude': self.detect_outliers('max_amplitude'),
|
|
318
|
+
'rms': self.detect_outliers('rms')
|
|
319
|
+
}
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
return self.statistics
|
|
323
|
+
|
|
324
|
+
def export_statistics(self, output_path: str):
|
|
325
|
+
"""
|
|
326
|
+
导出统计结果到JSON文件
|
|
327
|
+
|
|
328
|
+
Args:
|
|
329
|
+
output_path: 输出文件路径
|
|
330
|
+
"""
|
|
331
|
+
# 转换numpy类型为python原生类型以便JSON序列化
|
|
332
|
+
def convert_numpy(obj):
|
|
333
|
+
if isinstance(obj, np.ndarray):
|
|
334
|
+
return obj.tolist()
|
|
335
|
+
elif isinstance(obj, np.floating):
|
|
336
|
+
return float(obj)
|
|
337
|
+
elif isinstance(obj, np.integer):
|
|
338
|
+
return int(obj)
|
|
339
|
+
elif isinstance(obj, dict):
|
|
340
|
+
return {key: convert_numpy(value) for key, value in obj.items()}
|
|
341
|
+
elif isinstance(obj, list):
|
|
342
|
+
return [convert_numpy(item) for item in obj]
|
|
343
|
+
else:
|
|
344
|
+
return obj
|
|
345
|
+
|
|
346
|
+
stats_json = convert_numpy(self.statistics)
|
|
347
|
+
|
|
348
|
+
with open(output_path, 'w', encoding='utf-8') as f:
|
|
349
|
+
json.dump(stats_json, f, indent=2, ensure_ascii=False)
|
|
350
|
+
|
|
351
|
+
|
|
352
|
+
def quick_audio_stats(file_paths: List[str], sr: int = 22050) -> Dict:
|
|
353
|
+
"""
|
|
354
|
+
快速音频统计分析
|
|
355
|
+
|
|
356
|
+
Args:
|
|
357
|
+
file_paths: 音频文件路径列表
|
|
358
|
+
sr: 采样率
|
|
359
|
+
|
|
360
|
+
Returns:
|
|
361
|
+
统计结果
|
|
362
|
+
"""
|
|
363
|
+
analyzer = AudioStatistics(sr=sr)
|
|
364
|
+
|
|
365
|
+
for file_path in file_paths:
|
|
366
|
+
analyzer.add_audio_file(file_path)
|
|
367
|
+
|
|
368
|
+
return analyzer.compute_all_statistics()
|
|
369
|
+
|
|
370
|
+
|
|
371
|
+
def compare_datasets(dataset1_paths: List[str], dataset2_paths: List[str],
|
|
372
|
+
sr: int = 22050) -> Dict:
|
|
373
|
+
"""
|
|
374
|
+
比较两个数据集
|
|
375
|
+
|
|
376
|
+
Args:
|
|
377
|
+
dataset1_paths: 数据集1文件路径
|
|
378
|
+
dataset2_paths: 数据集2文件路径
|
|
379
|
+
sr: 采样率
|
|
380
|
+
|
|
381
|
+
Returns:
|
|
382
|
+
比较结果
|
|
383
|
+
"""
|
|
384
|
+
analyzer1 = AudioStatistics(sr=sr)
|
|
385
|
+
analyzer2 = AudioStatistics(sr=sr)
|
|
386
|
+
|
|
387
|
+
for path in dataset1_paths:
|
|
388
|
+
analyzer1.add_audio_file(path)
|
|
389
|
+
|
|
390
|
+
for path in dataset2_paths:
|
|
391
|
+
analyzer2.add_audio_file(path)
|
|
392
|
+
|
|
393
|
+
stats1 = analyzer1.compute_all_statistics()
|
|
394
|
+
stats2 = analyzer2.compute_all_statistics()
|
|
395
|
+
|
|
396
|
+
comparison = {
|
|
397
|
+
'dataset1': stats1,
|
|
398
|
+
'dataset2': stats2,
|
|
399
|
+
'differences': {
|
|
400
|
+
'file_count_diff': stats2['file_count'] - stats1['file_count'],
|
|
401
|
+
'mean_duration_diff': stats2['duration_stats']['mean_duration'] - stats1['duration_stats']['mean_duration'],
|
|
402
|
+
'mean_rms_diff': stats2['amplitude_stats']['rms_values']['mean'] - stats1['amplitude_stats']['rms_values']['mean']
|
|
403
|
+
}
|
|
404
|
+
}
|
|
405
|
+
|
|
406
|
+
return comparison
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
'''
|
|
2
|
+
Author: 凌逆战 | Never
|
|
3
|
+
Date: 2025-08-05 01:36:09
|
|
4
|
+
Description:
|
|
5
|
+
时域特征分析模块
|
|
6
|
+
Temporal Features Analysis Module
|
|
7
|
+
|
|
8
|
+
提供音频时域特征提取和分析功能
|
|
9
|
+
'''
|
|
10
|
+
|
|
11
|
+
import warnings
|
|
12
|
+
from typing import Tuple, Optional, Union
|
|
13
|
+
from scipy import signal
|
|
14
|
+
import numpy as np
|
|
15
|
+
import librosa
|
|
16
|
+
# from neverlib.utils.utils import dB
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def dB(level):
|
|
20
|
+
"""将线性幅度转换为分贝
|
|
21
|
+
|
|
22
|
+
Args:
|
|
23
|
+
level: 线性幅度值
|
|
24
|
+
|
|
25
|
+
Returns:
|
|
26
|
+
分贝值
|
|
27
|
+
"""
|
|
28
|
+
return 20 * np.log10(level + 1e-10)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def peak_amplitude(wav):
|
|
32
|
+
""" 计算峰值幅度
|
|
33
|
+
:param wav: (*, ch)
|
|
34
|
+
:return:
|
|
35
|
+
"""
|
|
36
|
+
peak_amp = np.max(np.abs(wav))
|
|
37
|
+
return dB(peak_amp)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def rms_amplitude(wav, frame_length=512, hop_length=256):
|
|
41
|
+
""" 总计RMS振幅
|
|
42
|
+
:param wav: (*, ch)
|
|
43
|
+
:return: (frame_num,)
|
|
44
|
+
"""
|
|
45
|
+
# 分帧
|
|
46
|
+
frame = librosa.util.frame(wav.flatten(), frame_length=frame_length, hop_length=hop_length) # (frame_length, frame_num)
|
|
47
|
+
rms_amp = np.sqrt(np.mean(np.square(frame), axis=0)) # (frame_num,)
|
|
48
|
+
return dB(rms_amp)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def mean_rms_amplitude(wav):
|
|
52
|
+
""" 计算平均RMS振幅
|
|
53
|
+
:param wav: (*, ch)
|
|
54
|
+
:return:
|
|
55
|
+
"""
|
|
56
|
+
return np.mean(rms_amplitude(wav))
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def min_rms_amplitude(wav):
|
|
60
|
+
""" 计算最小RMS振幅
|
|
61
|
+
:param wav: (*, ch)
|
|
62
|
+
:return:
|
|
63
|
+
"""
|
|
64
|
+
return np.min(rms_amplitude(wav))
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def max_rms_amplitude(wav):
|
|
68
|
+
""" 计算最大RMS振幅
|
|
69
|
+
:param wav: (*, ch)
|
|
70
|
+
:return:
|
|
71
|
+
"""
|
|
72
|
+
return np.max(rms_amplitude(wav))
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def zero_crossing_rate(self, audio: np.ndarray) -> np.ndarray:
|
|
76
|
+
"""
|
|
77
|
+
计算过零率
|
|
78
|
+
|
|
79
|
+
Args:
|
|
80
|
+
audio: 音频信号
|
|
81
|
+
|
|
82
|
+
Returns:
|
|
83
|
+
过零率数组
|
|
84
|
+
"""
|
|
85
|
+
return librosa.feature.zero_crossing_rate(
|
|
86
|
+
audio, frame_length=self.frame_length, hop_length=self.hop_length
|
|
87
|
+
)[0]
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def short_time_energy(self, audio: np.ndarray) -> np.ndarray:
|
|
91
|
+
"""
|
|
92
|
+
计算短时能量
|
|
93
|
+
|
|
94
|
+
Args:
|
|
95
|
+
audio: 音频信号
|
|
96
|
+
|
|
97
|
+
Returns:
|
|
98
|
+
短时能量数组
|
|
99
|
+
"""
|
|
100
|
+
# 分帧
|
|
101
|
+
frames = librosa.util.frame(
|
|
102
|
+
audio, frame_length=self.frame_length, hop_length=self.hop_length
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
# 计算每帧的能量
|
|
106
|
+
energy = np.sum(frames ** 2, axis=0)
|
|
107
|
+
|
|
108
|
+
return energy
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def dc_offset(wav):
|
|
112
|
+
""" 计算直流分量
|
|
113
|
+
:param wav: (*, ch)
|
|
114
|
+
:return:
|
|
115
|
+
"""
|
|
116
|
+
return np.mean(wav)
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
if __name__ == "__main__":
|
|
120
|
+
wav = np.random.randn(16000)
|
|
121
|
+
# print(peak_amplitude(wav))
|
|
122
|
+
print(rms_amplitude(wav).shape)
|
|
123
|
+
# print(mean_rms_amplitude(wav))
|
|
124
|
+
# print(zero_crossing_rate(wav))
|
|
125
|
+
# print(short_time_energy(wav))
|
|
126
|
+
# print(dc_offset(wav))
|