neverlib 0.2.1__py3-none-any.whl → 0.2.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. neverlib/__init__.py +2 -2
  2. neverlib/audio_aug/__init__.py +1 -1
  3. neverlib/audio_aug/audio_aug.py +4 -5
  4. neverlib/dataAnalyze/README.md +234 -0
  5. neverlib/dataAnalyze/__init__.py +87 -0
  6. neverlib/dataAnalyze/dataset_analyzer.py +590 -0
  7. neverlib/dataAnalyze/quality_metrics.py +364 -0
  8. neverlib/dataAnalyze/rms_distrubution.py +62 -0
  9. neverlib/dataAnalyze/spectral_analysis.py +218 -0
  10. neverlib/dataAnalyze/statistics.py +406 -0
  11. neverlib/dataAnalyze/temporal_features.py +126 -0
  12. neverlib/dataAnalyze/visualization.py +468 -0
  13. neverlib/filter/AudoEQ/README.md +165 -0
  14. neverlib/filter/AudoEQ/auto_eq_de.py +361 -0
  15. neverlib/filter/AudoEQ/auto_eq_ga_advanced.py +577 -0
  16. neverlib/filter/AudoEQ/auto_eq_ga_basic.py +380 -0
  17. neverlib/filter/AudoEQ/auto_eq_spectral_direct.py +75 -0
  18. neverlib/filter/README.md +101 -0
  19. neverlib/filter/__init__.py +7 -0
  20. neverlib/filter/biquad.py +45 -0
  21. neverlib/filter/common.py +5 -6
  22. neverlib/filter/core.py +339 -0
  23. neverlib/metrics/dnsmos.py +160 -0
  24. neverlib/metrics/snr.py +177 -0
  25. neverlib/metrics/spec.py +45 -0
  26. neverlib/metrics/test_pesq.py +35 -0
  27. neverlib/metrics/time.py +68 -0
  28. neverlib/tests/test_vad.py +21 -0
  29. neverlib/utils/audio_split.py +5 -3
  30. neverlib/utils/message.py +4 -4
  31. neverlib/utils/utils.py +32 -15
  32. neverlib/vad/PreProcess.py +1 -1
  33. neverlib/vad/README.md +10 -10
  34. neverlib/vad/VAD_Energy.py +1 -1
  35. neverlib/vad/VAD_Silero.py +1 -1
  36. neverlib/vad/VAD_WebRTC.py +1 -1
  37. neverlib/vad/VAD_funasr.py +1 -1
  38. neverlib/vad/VAD_statistics.py +3 -3
  39. neverlib/vad/VAD_vadlib.py +2 -2
  40. neverlib/vad/VAD_whisper.py +1 -1
  41. neverlib/vad/__init__.py +1 -1
  42. neverlib/vad/class_get_speech.py +4 -4
  43. neverlib/vad/class_vad.py +1 -1
  44. neverlib/vad/utils.py +47 -5
  45. {neverlib-0.2.1.dist-info → neverlib-0.2.3.dist-info}/METADATA +120 -120
  46. neverlib-0.2.3.dist-info/RECORD +53 -0
  47. {neverlib-0.2.1.dist-info → neverlib-0.2.3.dist-info}/WHEEL +1 -1
  48. neverlib/Documents/vad/VAD_Energy.ipynb +0 -159
  49. neverlib/Documents/vad/VAD_Silero.ipynb +0 -305
  50. neverlib/Documents/vad/VAD_WebRTC.ipynb +0 -183
  51. neverlib/Documents/vad/VAD_funasr.ipynb +0 -179
  52. neverlib/Documents/vad/VAD_ppasr.ipynb +0 -175
  53. neverlib/Documents/vad/VAD_statistics.ipynb +0 -522
  54. neverlib/Documents/vad/VAD_vadlib.ipynb +0 -184
  55. neverlib/Documents/vad/VAD_whisper.ipynb +0 -430
  56. neverlib/utils/waveform_analyzer.py +0 -51
  57. neverlib/wav_data/000_short.wav +0 -0
  58. neverlib-0.2.1.dist-info/RECORD +0 -40
  59. {neverlib-0.2.1.dist-info → neverlib-0.2.3.dist-info}/licenses/LICENSE +0 -0
  60. {neverlib-0.2.1.dist-info → neverlib-0.2.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,406 @@
1
+ """
2
+ 统计分析工具模块
3
+ Statistics Analysis Module
4
+
5
+ 提供音频数据集统计分析功能
6
+ """
7
+
8
+ import numpy as np
9
+ import librosa
10
+ import os
11
+ from pathlib import Path
12
+ from typing import List, Dict, Tuple, Optional, Union
13
+ import matplotlib.pyplot as plt
14
+ from collections import defaultdict
15
+ import json
16
+ from .utils import rms_amplitude, dB
17
+
18
+
19
+ class AudioStatistics:
20
+ """音频统计分析类"""
21
+
22
+ def __init__(self, sr: int = 22050):
23
+ """
24
+ 初始化统计分析器
25
+
26
+ Args:
27
+ sr: 采样率
28
+ """
29
+ self.sr = sr
30
+ self.audio_data = []
31
+ self.file_paths = []
32
+ self.statistics = {}
33
+
34
+ def add_audio_file(self, file_path: str, audio_data: Optional[np.ndarray] = None):
35
+ """
36
+ 添加音频文件到分析列表
37
+
38
+ Args:
39
+ file_path: 音频文件路径
40
+ audio_data: 音频数据(如果不提供则从文件加载)
41
+ """
42
+ if audio_data is None:
43
+ try:
44
+ audio_data, _ = librosa.load(file_path, sr=self.sr)
45
+ except Exception as e:
46
+ print(f"Error loading {file_path}: {e}")
47
+ return
48
+
49
+ self.audio_data.append(audio_data)
50
+ self.file_paths.append(file_path)
51
+
52
+ def add_audio_directory(self, directory: str, extensions: List[str] = None):
53
+ """
54
+ 批量添加目录中的音频文件
55
+
56
+ Args:
57
+ directory: 音频文件目录
58
+ extensions: 支持的文件扩展名
59
+ """
60
+ if extensions is None:
61
+ extensions = ['.wav', '.mp3', '.flac', '.m4a', '.aac']
62
+
63
+ directory = Path(directory)
64
+ for ext in extensions:
65
+ for file_path in directory.glob(f'*{ext}'):
66
+ self.add_audio_file(str(file_path))
67
+
68
+ def compute_duration_statistics(self) -> Dict:
69
+ """
70
+ 计算音频时长统计
71
+
72
+ Returns:
73
+ 时长统计信息
74
+ """
75
+ durations = [len(audio) / self.sr for audio in self.audio_data]
76
+
77
+ if not durations:
78
+ return {}
79
+
80
+ stats = {
81
+ 'count': len(durations),
82
+ 'total_duration': sum(durations),
83
+ 'mean_duration': np.mean(durations),
84
+ 'median_duration': np.median(durations),
85
+ 'std_duration': np.std(durations),
86
+ 'min_duration': np.min(durations),
87
+ 'max_duration': np.max(durations),
88
+ 'percentiles': {
89
+ '25th': np.percentile(durations, 25),
90
+ '75th': np.percentile(durations, 75),
91
+ '90th': np.percentile(durations, 90),
92
+ '95th': np.percentile(durations, 95)
93
+ }
94
+ }
95
+
96
+ return stats
97
+
98
+ def compute_amplitude_statistics(self) -> Dict:
99
+ """
100
+ 计算幅度统计
101
+
102
+ Returns:
103
+ 幅度统计信息
104
+ """
105
+ all_amplitudes = []
106
+ max_amplitudes = []
107
+ rms_values = []
108
+
109
+ for audio in self.audio_data:
110
+ all_amplitudes.extend(np.abs(audio).tolist())
111
+ max_amplitudes.append(np.max(np.abs(audio)))
112
+ rms_values.append(rms_amplitude(audio))
113
+
114
+ if not all_amplitudes:
115
+ return {}
116
+
117
+ all_amplitudes = np.array(all_amplitudes)
118
+
119
+ stats = {
120
+ 'overall': {
121
+ 'mean': np.mean(all_amplitudes),
122
+ 'std': np.std(all_amplitudes),
123
+ 'min': np.min(all_amplitudes),
124
+ 'max': np.max(all_amplitudes),
125
+ 'percentiles': {
126
+ '50th': np.percentile(all_amplitudes, 50),
127
+ '90th': np.percentile(all_amplitudes, 90),
128
+ '95th': np.percentile(all_amplitudes, 95),
129
+ '99th': np.percentile(all_amplitudes, 99)
130
+ }
131
+ },
132
+ 'peak_amplitudes': {
133
+ 'mean': np.mean(max_amplitudes),
134
+ 'std': np.std(max_amplitudes),
135
+ 'min': np.min(max_amplitudes),
136
+ 'max': np.max(max_amplitudes)
137
+ },
138
+ 'rms_values': {
139
+ 'mean': np.mean(rms_values),
140
+ 'std': np.std(rms_values),
141
+ 'min': np.min(rms_values),
142
+ 'max': np.max(rms_values),
143
+ 'mean_db': dB(np.mean(rms_values)),
144
+ 'std_db': np.std([dB(rms) for rms in rms_values])
145
+ }
146
+ }
147
+
148
+ return stats
149
+
150
+ def compute_frequency_statistics(self) -> Dict:
151
+ """
152
+ 计算频域统计
153
+
154
+ Returns:
155
+ 频域统计信息
156
+ """
157
+ spectral_centroids = []
158
+ spectral_bandwidths = []
159
+ spectral_rolloffs = []
160
+
161
+ for audio in self.audio_data:
162
+ # 计算频谱特征
163
+ centroid = librosa.feature.spectral_centroid(y=audio, sr=self.sr)[0]
164
+ bandwidth = librosa.feature.spectral_bandwidth(y=audio, sr=self.sr)[0]
165
+ rolloff = librosa.feature.spectral_rolloff(y=audio, sr=self.sr)[0]
166
+
167
+ spectral_centroids.extend(centroid.tolist())
168
+ spectral_bandwidths.extend(bandwidth.tolist())
169
+ spectral_rolloffs.extend(rolloff.tolist())
170
+
171
+ if not spectral_centroids:
172
+ return {}
173
+
174
+ stats = {
175
+ 'spectral_centroid': {
176
+ 'mean': np.mean(spectral_centroids),
177
+ 'std': np.std(spectral_centroids),
178
+ 'min': np.min(spectral_centroids),
179
+ 'max': np.max(spectral_centroids)
180
+ },
181
+ 'spectral_bandwidth': {
182
+ 'mean': np.mean(spectral_bandwidths),
183
+ 'std': np.std(spectral_bandwidths),
184
+ 'min': np.min(spectral_bandwidths),
185
+ 'max': np.max(spectral_bandwidths)
186
+ },
187
+ 'spectral_rolloff': {
188
+ 'mean': np.mean(spectral_rolloffs),
189
+ 'std': np.std(spectral_rolloffs),
190
+ 'min': np.min(spectral_rolloffs),
191
+ 'max': np.max(spectral_rolloffs)
192
+ }
193
+ }
194
+
195
+ return stats
196
+
197
+ def detect_outliers(self, feature: str = 'duration', threshold: float = 2.0) -> List[Tuple[str, float]]:
198
+ """
199
+ 检测异常值
200
+
201
+ Args:
202
+ feature: 要检测的特征 ('duration', 'max_amplitude', 'rms')
203
+ threshold: Z-score阈值
204
+
205
+ Returns:
206
+ 异常文件列表 [(文件路径, 特征值)]
207
+ """
208
+ if feature == 'duration':
209
+ values = [len(audio) / self.sr for audio in self.audio_data]
210
+ elif feature == 'max_amplitude':
211
+ values = [np.max(np.abs(audio)) for audio in self.audio_data]
212
+ elif feature == 'rms':
213
+ values = [rms_amplitude(audio) for audio in self.audio_data]
214
+ else:
215
+ raise ValueError(f"Unknown feature: {feature}")
216
+
217
+ values = np.array(values)
218
+ mean_val = np.mean(values)
219
+ std_val = np.std(values)
220
+
221
+ outliers = []
222
+ for i, (path, val) in enumerate(zip(self.file_paths, values)):
223
+ z_score = abs(val - mean_val) / (std_val + 1e-10)
224
+ if z_score > threshold:
225
+ outliers.append((path, val))
226
+
227
+ return outliers
228
+
229
+ def generate_distribution_analysis(self) -> Dict:
230
+ """
231
+ 生成分布分析
232
+
233
+ Returns:
234
+ 分布分析结果
235
+ """
236
+ analysis = {
237
+ 'duration_distribution': self._analyze_distribution([len(audio) / self.sr for audio in self.audio_data]),
238
+ 'amplitude_distribution': self._analyze_distribution([np.max(np.abs(audio)) for audio in self.audio_data]),
239
+ 'rms_distribution': self._analyze_distribution([rms_amplitude(audio) for audio in self.audio_data])
240
+ }
241
+
242
+ return analysis
243
+
244
+ def _analyze_distribution(self, values: List[float]) -> Dict:
245
+ """
246
+ 分析数值分布
247
+
248
+ Args:
249
+ values: 数值列表
250
+
251
+ Returns:
252
+ 分布分析结果
253
+ """
254
+ if not values:
255
+ return {}
256
+
257
+ values = np.array(values)
258
+
259
+ # 计算偏度和峰度
260
+ mean_val = np.mean(values)
261
+ std_val = np.std(values)
262
+
263
+ # 偏度 (skewness)
264
+ skewness = np.mean(((values - mean_val) / (std_val + 1e-10)) ** 3)
265
+
266
+ # 峰度 (kurtosis)
267
+ kurtosis = np.mean(((values - mean_val) / (std_val + 1e-10)) ** 4) - 3
268
+
269
+ return {
270
+ 'mean': mean_val,
271
+ 'std': std_val,
272
+ 'skewness': skewness,
273
+ 'kurtosis': kurtosis,
274
+ 'distribution_type': self._classify_distribution(skewness, kurtosis)
275
+ }
276
+
277
+ def _classify_distribution(self, skewness: float, kurtosis: float) -> str:
278
+ """
279
+ 分类分布类型
280
+
281
+ Args:
282
+ skewness: 偏度
283
+ kurtosis: 峰度
284
+
285
+ Returns:
286
+ 分布类型描述
287
+ """
288
+ if abs(skewness) < 0.5 and abs(kurtosis) < 0.5:
289
+ return "approximately_normal"
290
+ elif skewness > 0.5:
291
+ return "right_skewed"
292
+ elif skewness < -0.5:
293
+ return "left_skewed"
294
+ elif kurtosis > 0.5:
295
+ return "heavy_tailed"
296
+ elif kurtosis < -0.5:
297
+ return "light_tailed"
298
+ else:
299
+ return "unknown"
300
+
301
+ def compute_all_statistics(self) -> Dict:
302
+ """
303
+ 计算所有统计信息
304
+
305
+ Returns:
306
+ 完整统计报告
307
+ """
308
+ self.statistics = {
309
+ 'file_count': len(self.audio_data),
310
+ 'sample_rate': self.sr,
311
+ 'duration_stats': self.compute_duration_statistics(),
312
+ 'amplitude_stats': self.compute_amplitude_statistics(),
313
+ 'frequency_stats': self.compute_frequency_statistics(),
314
+ 'distribution_analysis': self.generate_distribution_analysis(),
315
+ 'outliers': {
316
+ 'duration': self.detect_outliers('duration'),
317
+ 'max_amplitude': self.detect_outliers('max_amplitude'),
318
+ 'rms': self.detect_outliers('rms')
319
+ }
320
+ }
321
+
322
+ return self.statistics
323
+
324
+ def export_statistics(self, output_path: str):
325
+ """
326
+ 导出统计结果到JSON文件
327
+
328
+ Args:
329
+ output_path: 输出文件路径
330
+ """
331
+ # 转换numpy类型为python原生类型以便JSON序列化
332
+ def convert_numpy(obj):
333
+ if isinstance(obj, np.ndarray):
334
+ return obj.tolist()
335
+ elif isinstance(obj, np.floating):
336
+ return float(obj)
337
+ elif isinstance(obj, np.integer):
338
+ return int(obj)
339
+ elif isinstance(obj, dict):
340
+ return {key: convert_numpy(value) for key, value in obj.items()}
341
+ elif isinstance(obj, list):
342
+ return [convert_numpy(item) for item in obj]
343
+ else:
344
+ return obj
345
+
346
+ stats_json = convert_numpy(self.statistics)
347
+
348
+ with open(output_path, 'w', encoding='utf-8') as f:
349
+ json.dump(stats_json, f, indent=2, ensure_ascii=False)
350
+
351
+
352
+ def quick_audio_stats(file_paths: List[str], sr: int = 22050) -> Dict:
353
+ """
354
+ 快速音频统计分析
355
+
356
+ Args:
357
+ file_paths: 音频文件路径列表
358
+ sr: 采样率
359
+
360
+ Returns:
361
+ 统计结果
362
+ """
363
+ analyzer = AudioStatistics(sr=sr)
364
+
365
+ for file_path in file_paths:
366
+ analyzer.add_audio_file(file_path)
367
+
368
+ return analyzer.compute_all_statistics()
369
+
370
+
371
+ def compare_datasets(dataset1_paths: List[str], dataset2_paths: List[str],
372
+ sr: int = 22050) -> Dict:
373
+ """
374
+ 比较两个数据集
375
+
376
+ Args:
377
+ dataset1_paths: 数据集1文件路径
378
+ dataset2_paths: 数据集2文件路径
379
+ sr: 采样率
380
+
381
+ Returns:
382
+ 比较结果
383
+ """
384
+ analyzer1 = AudioStatistics(sr=sr)
385
+ analyzer2 = AudioStatistics(sr=sr)
386
+
387
+ for path in dataset1_paths:
388
+ analyzer1.add_audio_file(path)
389
+
390
+ for path in dataset2_paths:
391
+ analyzer2.add_audio_file(path)
392
+
393
+ stats1 = analyzer1.compute_all_statistics()
394
+ stats2 = analyzer2.compute_all_statistics()
395
+
396
+ comparison = {
397
+ 'dataset1': stats1,
398
+ 'dataset2': stats2,
399
+ 'differences': {
400
+ 'file_count_diff': stats2['file_count'] - stats1['file_count'],
401
+ 'mean_duration_diff': stats2['duration_stats']['mean_duration'] - stats1['duration_stats']['mean_duration'],
402
+ 'mean_rms_diff': stats2['amplitude_stats']['rms_values']['mean'] - stats1['amplitude_stats']['rms_values']['mean']
403
+ }
404
+ }
405
+
406
+ return comparison
@@ -0,0 +1,126 @@
1
+ '''
2
+ Author: 凌逆战 | Never
3
+ Date: 2025-08-05 01:36:09
4
+ Description:
5
+ 时域特征分析模块
6
+ Temporal Features Analysis Module
7
+
8
+ 提供音频时域特征提取和分析功能
9
+ '''
10
+
11
+ import warnings
12
+ from typing import Tuple, Optional, Union
13
+ from scipy import signal
14
+ import numpy as np
15
+ import librosa
16
+ # from neverlib.utils.utils import dB
17
+
18
+
19
+ def dB(level):
20
+ """将线性幅度转换为分贝
21
+
22
+ Args:
23
+ level: 线性幅度值
24
+
25
+ Returns:
26
+ 分贝值
27
+ """
28
+ return 20 * np.log10(level + 1e-10)
29
+
30
+
31
+ def peak_amplitude(wav):
32
+ """ 计算峰值幅度
33
+ :param wav: (*, ch)
34
+ :return:
35
+ """
36
+ peak_amp = np.max(np.abs(wav))
37
+ return dB(peak_amp)
38
+
39
+
40
+ def rms_amplitude(wav, frame_length=512, hop_length=256):
41
+ """ 总计RMS振幅
42
+ :param wav: (*, ch)
43
+ :return: (frame_num,)
44
+ """
45
+ # 分帧
46
+ frame = librosa.util.frame(wav.flatten(), frame_length=frame_length, hop_length=hop_length) # (frame_length, frame_num)
47
+ rms_amp = np.sqrt(np.mean(np.square(frame), axis=0)) # (frame_num,)
48
+ return dB(rms_amp)
49
+
50
+
51
+ def mean_rms_amplitude(wav):
52
+ """ 计算平均RMS振幅
53
+ :param wav: (*, ch)
54
+ :return:
55
+ """
56
+ return np.mean(rms_amplitude(wav))
57
+
58
+
59
+ def min_rms_amplitude(wav):
60
+ """ 计算最小RMS振幅
61
+ :param wav: (*, ch)
62
+ :return:
63
+ """
64
+ return np.min(rms_amplitude(wav))
65
+
66
+
67
+ def max_rms_amplitude(wav):
68
+ """ 计算最大RMS振幅
69
+ :param wav: (*, ch)
70
+ :return:
71
+ """
72
+ return np.max(rms_amplitude(wav))
73
+
74
+
75
+ def zero_crossing_rate(self, audio: np.ndarray) -> np.ndarray:
76
+ """
77
+ 计算过零率
78
+
79
+ Args:
80
+ audio: 音频信号
81
+
82
+ Returns:
83
+ 过零率数组
84
+ """
85
+ return librosa.feature.zero_crossing_rate(
86
+ audio, frame_length=self.frame_length, hop_length=self.hop_length
87
+ )[0]
88
+
89
+
90
+ def short_time_energy(self, audio: np.ndarray) -> np.ndarray:
91
+ """
92
+ 计算短时能量
93
+
94
+ Args:
95
+ audio: 音频信号
96
+
97
+ Returns:
98
+ 短时能量数组
99
+ """
100
+ # 分帧
101
+ frames = librosa.util.frame(
102
+ audio, frame_length=self.frame_length, hop_length=self.hop_length
103
+ )
104
+
105
+ # 计算每帧的能量
106
+ energy = np.sum(frames ** 2, axis=0)
107
+
108
+ return energy
109
+
110
+
111
+ def dc_offset(wav):
112
+ """ 计算直流分量
113
+ :param wav: (*, ch)
114
+ :return:
115
+ """
116
+ return np.mean(wav)
117
+
118
+
119
+ if __name__ == "__main__":
120
+ wav = np.random.randn(16000)
121
+ # print(peak_amplitude(wav))
122
+ print(rms_amplitude(wav).shape)
123
+ # print(mean_rms_amplitude(wav))
124
+ # print(zero_crossing_rate(wav))
125
+ # print(short_time_energy(wav))
126
+ # print(dc_offset(wav))