neverlib 0.2.2__py3-none-any.whl → 0.2.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. neverlib/__init__.py +2 -2
  2. neverlib/audio_aug/__init__.py +1 -1
  3. neverlib/audio_aug/audio_aug.py +4 -5
  4. neverlib/dataAnalyze/README.md +234 -0
  5. neverlib/dataAnalyze/__init__.py +87 -0
  6. neverlib/dataAnalyze/dataset_analyzer.py +590 -0
  7. neverlib/dataAnalyze/quality_metrics.py +364 -0
  8. neverlib/dataAnalyze/rms_distrubution.py +62 -0
  9. neverlib/dataAnalyze/spectral_analysis.py +218 -0
  10. neverlib/dataAnalyze/statistics.py +406 -0
  11. neverlib/dataAnalyze/temporal_features.py +126 -0
  12. neverlib/dataAnalyze/visualization.py +468 -0
  13. neverlib/filter/AudoEQ/README.md +165 -0
  14. neverlib/filter/AudoEQ/auto_eq_de.py +361 -0
  15. neverlib/filter/AudoEQ/auto_eq_ga_advanced.py +577 -0
  16. neverlib/filter/AudoEQ/auto_eq_ga_basic.py +380 -0
  17. neverlib/filter/AudoEQ/auto_eq_spectral_direct.py +75 -0
  18. neverlib/filter/README.md +101 -0
  19. neverlib/filter/__init__.py +7 -0
  20. neverlib/filter/biquad.py +45 -0
  21. neverlib/filter/common.py +5 -6
  22. neverlib/filter/core.py +339 -0
  23. neverlib/metrics/dnsmos.py +160 -0
  24. neverlib/metrics/snr.py +177 -0
  25. neverlib/metrics/spec.py +45 -0
  26. neverlib/metrics/test_pesq.py +35 -0
  27. neverlib/metrics/time.py +68 -0
  28. neverlib/tests/test_vad.py +21 -0
  29. neverlib/utils/audio_split.py +2 -1
  30. neverlib/utils/message.py +4 -4
  31. neverlib/utils/utils.py +32 -15
  32. neverlib/vad/PreProcess.py +1 -1
  33. neverlib/vad/README.md +10 -10
  34. neverlib/vad/VAD_Energy.py +1 -1
  35. neverlib/vad/VAD_Silero.py +1 -1
  36. neverlib/vad/VAD_WebRTC.py +1 -1
  37. neverlib/vad/VAD_funasr.py +1 -1
  38. neverlib/vad/VAD_statistics.py +3 -3
  39. neverlib/vad/VAD_vadlib.py +2 -2
  40. neverlib/vad/VAD_whisper.py +1 -1
  41. neverlib/vad/__init__.py +1 -1
  42. neverlib/vad/class_get_speech.py +4 -4
  43. neverlib/vad/class_vad.py +1 -1
  44. neverlib/vad/utils.py +47 -5
  45. {neverlib-0.2.2.dist-info → neverlib-0.2.3.dist-info}/METADATA +120 -120
  46. neverlib-0.2.3.dist-info/RECORD +53 -0
  47. {neverlib-0.2.2.dist-info → neverlib-0.2.3.dist-info}/WHEEL +1 -1
  48. neverlib/Documents/vad/VAD_Energy.ipynb +0 -159
  49. neverlib/Documents/vad/VAD_Silero.ipynb +0 -305
  50. neverlib/Documents/vad/VAD_WebRTC.ipynb +0 -183
  51. neverlib/Documents/vad/VAD_funasr.ipynb +0 -179
  52. neverlib/Documents/vad/VAD_ppasr.ipynb +0 -175
  53. neverlib/Documents/vad/VAD_statistics.ipynb +0 -522
  54. neverlib/Documents/vad/VAD_vadlib.ipynb +0 -184
  55. neverlib/Documents/vad/VAD_whisper.ipynb +0 -430
  56. neverlib/utils/waveform_analyzer.py +0 -51
  57. neverlib/wav_data/000_short.wav +0 -0
  58. neverlib-0.2.2.dist-info/RECORD +0 -40
  59. {neverlib-0.2.2.dist-info → neverlib-0.2.3.dist-info}/licenses/LICENSE +0 -0
  60. {neverlib-0.2.2.dist-info → neverlib-0.2.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,364 @@
1
+ """
2
+ 音频质量评估模块
3
+ Audio Quality Metrics Module
4
+
5
+ 提供音频质量评估和失真度分析功能
6
+ """
7
+
8
+ import numpy as np
9
+ import librosa
10
+ from scipy import signal
11
+ from scipy.fft import fft, fftfreq
12
+ from typing import Tuple, Optional, Union, List
13
+ import warnings
14
+
15
+
16
+ class QualityAnalyzer:
17
+ """音频质量分析器类"""
18
+
19
+ def __init__(self, sr: int = 22050):
20
+ """
21
+ 初始化质量分析器
22
+
23
+ Args:
24
+ sr: 采样率
25
+ """
26
+ self.sr = sr
27
+
28
+ def signal_to_noise_ratio(self, signal_audio: np.ndarray,
29
+ noise_audio: Optional[np.ndarray] = None,
30
+ signal_start: Optional[int] = None,
31
+ signal_end: Optional[int] = None) -> float:
32
+ """
33
+ 计算信噪比 (SNR)
34
+
35
+ Args:
36
+ signal_audio: 含有信号和噪声的音频
37
+ noise_audio: 纯噪声音频(可选)
38
+ signal_start: 信号开始位置(当噪声未单独提供时使用)
39
+ signal_end: 信号结束位置(当噪声未单独提供时使用)
40
+
41
+ Returns:
42
+ SNR值(dB)
43
+ """
44
+ if noise_audio is not None:
45
+ # 如果提供了噪声音频
46
+ signal_power = np.mean(signal_audio ** 2)
47
+ noise_power = np.mean(noise_audio ** 2)
48
+ else:
49
+ # 从音频中提取信号和噪声部分
50
+ if signal_start is None or signal_end is None:
51
+ raise ValueError("Must provide signal_start and signal_end when noise_audio is None")
52
+
53
+ signal_part = signal_audio[signal_start:signal_end]
54
+
55
+ # 假设开头和结尾是噪声
56
+ noise_start = signal_audio[:signal_start] if signal_start > 0 else np.array([])
57
+ noise_end = signal_audio[signal_end:] if signal_end < len(signal_audio) else np.array([])
58
+ noise_part = np.concatenate([noise_start, noise_end]) if len(noise_start) > 0 or len(noise_end) > 0 else signal_audio[:1000]
59
+
60
+ signal_power = np.mean(signal_part ** 2)
61
+ noise_power = np.mean(noise_part ** 2)
62
+
63
+ if noise_power == 0:
64
+ return float('inf')
65
+
66
+ snr_db = 10 * np.log10(signal_power / noise_power)
67
+ return snr_db
68
+
69
+ def total_harmonic_distortion(self, audio: np.ndarray,
70
+ fundamental_freq: Optional[float] = None,
71
+ num_harmonics: int = 5) -> float:
72
+ """
73
+ 计算总谐波失真 (THD)
74
+
75
+ Args:
76
+ audio: 音频信号
77
+ fundamental_freq: 基频(Hz), 如果不提供则自动检测
78
+ num_harmonics: 考虑的谐波数量
79
+
80
+ Returns:
81
+ THD百分比
82
+ """
83
+ # 计算频谱
84
+ spectrum = fft(audio)
85
+ freqs = fftfreq(len(audio), 1/self.sr)
86
+ magnitude = np.abs(spectrum)
87
+
88
+ # 只考虑正频率
89
+ positive_idx = freqs > 0
90
+ freqs = freqs[positive_idx]
91
+ magnitude = magnitude[positive_idx]
92
+
93
+ # 如果没有提供基频, 自动检测
94
+ if fundamental_freq is None:
95
+ fundamental_freq = freqs[np.argmax(magnitude)]
96
+
97
+ # 找到基频和谐波的功率
98
+ tolerance = fundamental_freq * 0.05 # 5%的容差
99
+
100
+ # 基频功率
101
+ fundamental_idx = np.where(np.abs(freqs - fundamental_freq) < tolerance)[0]
102
+ if len(fundamental_idx) == 0:
103
+ return 0.0
104
+
105
+ fundamental_power = np.max(magnitude[fundamental_idx]) ** 2
106
+
107
+ # 谐波功率
108
+ harmonic_power = 0
109
+ for h in range(2, num_harmonics + 2):
110
+ harmonic_freq = h * fundamental_freq
111
+ harmonic_idx = np.where(np.abs(freqs - harmonic_freq) < tolerance)[0]
112
+ if len(harmonic_idx) > 0:
113
+ harmonic_power += np.max(magnitude[harmonic_idx]) ** 2
114
+
115
+ if fundamental_power == 0:
116
+ return 0.0
117
+
118
+ thd = np.sqrt(harmonic_power / fundamental_power) * 100
119
+ return thd
120
+
121
+ def dynamic_range(self, audio: np.ndarray, percentile_low: float = 1,
122
+ percentile_high: float = 99) -> float:
123
+ """
124
+ 计算动态范围
125
+
126
+ Args:
127
+ audio: 音频信号
128
+ percentile_low: 低百分位数
129
+ percentile_high: 高百分位数
130
+
131
+ Returns:
132
+ 动态范围(dB)
133
+ """
134
+ amplitude = np.abs(audio)
135
+ amplitude = amplitude[amplitude > 0] # 避免log(0)
136
+
137
+ if len(amplitude) == 0:
138
+ return 0.0
139
+
140
+ low_level = np.percentile(amplitude, percentile_low)
141
+ high_level = np.percentile(amplitude, percentile_high)
142
+
143
+ dynamic_range_db = 20 * np.log10(high_level / (low_level + 1e-10))
144
+ return dynamic_range_db
145
+
146
+ def frequency_response(self, audio: np.ndarray,
147
+ reference_audio: Optional[np.ndarray] = None) -> Tuple[np.ndarray, np.ndarray]:
148
+ """
149
+ 计算频率响应特性
150
+
151
+ Args:
152
+ audio: 测试音频信号
153
+ reference_audio: 参考音频信号(可选)
154
+
155
+ Returns:
156
+ (频率数组, 幅度响应数组)
157
+ """
158
+ if reference_audio is not None:
159
+ # 计算传递函数
160
+ freqs, h = signal.freqz_zpk(*signal.tf2zpk([1], [1]), fs=self.sr)
161
+
162
+ # 使用互相关计算频率响应
163
+ cross_corr = signal.correlate(audio, reference_audio, mode='full')
164
+ auto_corr = signal.correlate(reference_audio, reference_audio, mode='full')
165
+
166
+ # 频域除法得到传递函数
167
+ cross_spectrum = fft(cross_corr)
168
+ auto_spectrum = fft(auto_corr)
169
+
170
+ h_measured = cross_spectrum / (auto_spectrum + 1e-10)
171
+ freqs = fftfreq(len(h_measured), 1/self.sr)
172
+
173
+ # 只取正频率部分
174
+ positive_idx = freqs >= 0
175
+ freqs = freqs[positive_idx]
176
+ h_measured = h_measured[positive_idx]
177
+
178
+ return freqs, np.abs(h_measured)
179
+ else:
180
+ # 直接返回频谱
181
+ spectrum = fft(audio)
182
+ freqs = fftfreq(len(audio), 1/self.sr)
183
+
184
+ positive_idx = freqs >= 0
185
+ freqs = freqs[positive_idx]
186
+ spectrum = spectrum[positive_idx]
187
+
188
+ return freqs, np.abs(spectrum)
189
+
190
+ def loudness_range(self, audio: np.ndarray, gate_threshold: float = -70) -> dict:
191
+ """
192
+ 计算响度范围(基于EBU R128标准的简化版本)
193
+
194
+ Args:
195
+ audio: 音频信号
196
+ gate_threshold: 门限阈值(dB)
197
+
198
+ Returns:
199
+ 响度统计信息字典
200
+ """
201
+ # 分块计算短时响度
202
+ block_size = int(0.4 * self.sr) # 400ms块
203
+ hop_size = int(0.1 * self.sr) # 100ms跳跃
204
+
205
+ blocks = []
206
+ for i in range(0, len(audio) - block_size, hop_size):
207
+ block = audio[i:i + block_size]
208
+ # 简化的响度计算(使用RMS近似)
209
+ rms = np.sqrt(np.mean(block ** 2))
210
+ if rms > 0:
211
+ loudness = 20 * np.log10(rms)
212
+ if loudness > gate_threshold:
213
+ blocks.append(loudness)
214
+
215
+ if len(blocks) == 0:
216
+ return {'integrated_loudness': -float('inf'), 'loudness_range': 0, 'max_loudness': -float('inf')}
217
+
218
+ blocks = np.array(blocks)
219
+
220
+ # 计算统计量
221
+ integrated_loudness = np.mean(blocks)
222
+ loudness_range = np.percentile(blocks, 95) - np.percentile(blocks, 10)
223
+ max_loudness = np.max(blocks)
224
+
225
+ return {
226
+ 'integrated_loudness': integrated_loudness,
227
+ 'loudness_range': loudness_range,
228
+ 'max_loudness': max_loudness
229
+ }
230
+
231
+ def spectral_distortion(self, original: np.ndarray, processed: np.ndarray) -> float:
232
+ """
233
+ 计算谱失真度
234
+
235
+ Args:
236
+ original: 原始音频
237
+ processed: 处理后音频
238
+
239
+ Returns:
240
+ 谱失真度(dB)
241
+ """
242
+ # 确保两个信号长度相同
243
+ min_len = min(len(original), len(processed))
244
+ original = original[:min_len]
245
+ processed = processed[:min_len]
246
+
247
+ # 计算频谱
248
+ orig_spectrum = np.abs(fft(original))
249
+ proc_spectrum = np.abs(fft(processed))
250
+
251
+ # 计算谱失真
252
+ mse = np.mean((orig_spectrum - proc_spectrum) ** 2)
253
+ orig_power = np.mean(orig_spectrum ** 2)
254
+
255
+ if orig_power == 0:
256
+ return float('inf')
257
+
258
+ distortion_db = 10 * np.log10(mse / orig_power)
259
+ return distortion_db
260
+
261
+
262
+ def comprehensive_quality_assessment(audio: np.ndarray, sr: int = 22050,
263
+ reference: Optional[np.ndarray] = None) -> dict:
264
+ """
265
+ 综合质量评估
266
+
267
+ Args:
268
+ audio: 待评估音频
269
+ sr: 采样率
270
+ reference: 参考音频(可选)
271
+
272
+ Returns:
273
+ 质量评估结果字典
274
+ """
275
+ analyzer = QualityAnalyzer(sr=sr)
276
+
277
+ results = {
278
+ 'dynamic_range': analyzer.dynamic_range(audio),
279
+ 'loudness_stats': analyzer.loudness_range(audio),
280
+ }
281
+
282
+ # 尝试计算THD
283
+ try:
284
+ results['thd'] = analyzer.total_harmonic_distortion(audio)
285
+ except:
286
+ results['thd'] = None
287
+
288
+ # 如果有参考音频, 计算比较指标
289
+ if reference is not None:
290
+ try:
291
+ results['snr'] = analyzer.signal_to_noise_ratio(audio, reference)
292
+ results['spectral_distortion'] = analyzer.spectral_distortion(reference, audio)
293
+ except:
294
+ results['snr'] = None
295
+ results['spectral_distortion'] = None
296
+
297
+ # 频率响应
298
+ try:
299
+ freqs, response = analyzer.frequency_response(audio, reference)
300
+ results['frequency_response'] = {
301
+ 'frequencies': freqs,
302
+ 'magnitude': response
303
+ }
304
+ except:
305
+ results['frequency_response'] = None
306
+
307
+ return results
308
+
309
+
310
+ def audio_health_check(audio: np.ndarray, sr: int = 22050) -> dict:
311
+ """
312
+ 音频健康检查
313
+
314
+ Args:
315
+ audio: 音频信号
316
+ sr: 采样率
317
+
318
+ Returns:
319
+ 健康检查结果
320
+ """
321
+ health_report = {
322
+ 'issues': [],
323
+ 'warnings': [],
324
+ 'stats': {}
325
+ }
326
+
327
+ # 基础统计
328
+ max_amplitude = np.max(np.abs(audio))
329
+ min_amplitude = np.min(np.abs(audio))
330
+ mean_amplitude = np.mean(np.abs(audio))
331
+
332
+ health_report['stats'] = {
333
+ 'max_amplitude': max_amplitude,
334
+ 'min_amplitude': min_amplitude,
335
+ 'mean_amplitude': mean_amplitude,
336
+ 'duration': len(audio) / sr
337
+ }
338
+
339
+ # 检查削波
340
+ if max_amplitude >= 0.99:
341
+ health_report['issues'].append('Potential clipping detected')
342
+
343
+ # 检查过低音量
344
+ if max_amplitude < 0.01:
345
+ health_report['warnings'].append('Very low signal level')
346
+
347
+ # 检查静音
348
+ if mean_amplitude < 1e-6:
349
+ health_report['issues'].append('Signal appears to be silent')
350
+
351
+ # 检查DC偏移
352
+ dc_offset = np.mean(audio)
353
+ if abs(dc_offset) > 0.01:
354
+ health_report['warnings'].append(f'DC offset detected: {dc_offset:.4f}')
355
+
356
+ # 检查动态范围
357
+ analyzer = QualityAnalyzer(sr=sr)
358
+ dynamic_range = analyzer.dynamic_range(audio)
359
+ if dynamic_range < 6:
360
+ health_report['warnings'].append('Low dynamic range')
361
+ elif dynamic_range > 60:
362
+ health_report['warnings'].append('Very high dynamic range - check for noise')
363
+
364
+ return health_report
@@ -0,0 +1,62 @@
1
+ '''
2
+ Author: 凌逆战 | Never
3
+ Date: 2025-03-26 22:13:22
4
+ Description:
5
+ '''
6
+ # -*- coding:utf-8 -*-
7
+ # Author:凌逆战 | Never
8
+ # Date: 2025/3/2
9
+ """
10
+ 统计音频语音段rms值分布
11
+ """
12
+ import sys
13
+ sys.path.append("../../../")
14
+ import torch
15
+ import soundfile as sf
16
+ from neverlib.utils import get_path_list
17
+ from neverlib.filter import HPFilter
18
+ from neverlib.audio_aug import volume_norm
19
+ from neverlib.dataAnalyze.utils import rms_amplitude
20
+ from joblib import Parallel, delayed
21
+ import matplotlib.pyplot as plt
22
+ import numpy as np
23
+ import librosa
24
+ import os
25
+ from utils.train_utils import from_path_get_vadpoint
26
+
27
+
28
+
29
+
30
+ def get_rms_vad(wav_path):
31
+ wav, wav_sr = sf.read(wav_path, always_2d=True) # (xxx,ch)
32
+ assert wav_sr == sr, f"期望采样率为{sr}, 但是为{wav_sr}, 文件名: {wav_path}"
33
+ vadstart, vadend = from_path_get_vadpoint(wav_path)
34
+ rms = rms_amplitude(wav[vadstart:vadend]).mean()
35
+ # if rms < -75:
36
+ # print(wav_path, np.round(rms, 2))
37
+ # if rms > -5:
38
+ # print(wav_path, np.round(rms, 2))
39
+ return rms
40
+
41
+
42
+ sr = 16000
43
+ wav_dir_list = [
44
+ "/data/never/Dataset/kws_data/Command_Word/Crowdsourcing/en_kws2/train/RealPerson",
45
+ "/data/never/Dataset/kws_data/Command_Word/Crowdsourcing/en_kws2/val/RealPerson",
46
+ "/data/never/Dataset/kws_data/Command_Word/Crowdsourcing/en_kws2/test/RealPerson",
47
+ ]
48
+ wav_path_list = []
49
+ for wav_dir in wav_dir_list:
50
+ wav_path_list.extend(get_path_list(wav_dir, end="*.wav"))
51
+
52
+ rms_list = Parallel(n_jobs=64)(delayed(get_rms_vad)(wav_path) for wav_path in wav_path_list)
53
+
54
+ # 绘制时长分布直方图
55
+ plt.hist(rms_list, bins=100, edgecolor='black')
56
+ plt.title("RMS Distribution")
57
+ plt.xlabel("RMS (dB)")
58
+ plt.ylabel("number")
59
+ plt.grid(True)
60
+ plt.tight_layout()
61
+ plt.savefig("./png_dist/rms_distribution.png")
62
+
@@ -0,0 +1,218 @@
1
+ """
2
+ 频域分析模块
3
+ Spectral Analysis Module
4
+
5
+ 提供音频频域特征提取和分析功能
6
+ """
7
+
8
+ import numpy as np
9
+ import librosa
10
+ import scipy.signal
11
+ from scipy.fft import fft, fftfreq
12
+ from typing import Tuple, Optional, Union
13
+ import warnings
14
+
15
+
16
+ class SpectralAnalyzer:
17
+ """频谱分析器类"""
18
+
19
+ def __init__(self, sr: int = 22050, n_fft: int = 2048, hop_length: int = 512):
20
+ """
21
+ 初始化频谱分析器
22
+
23
+ Args:
24
+ sr: 采样率
25
+ n_fft: FFT窗口大小
26
+ hop_length: 跳跃长度
27
+ """
28
+ self.sr = sr
29
+ self.n_fft = n_fft
30
+ self.hop_length = hop_length
31
+
32
+ def compute_stft(self, audio: np.ndarray) -> np.ndarray:
33
+ """
34
+ 计算短时傅里叶变换
35
+
36
+ Args:
37
+ audio: 音频信号
38
+
39
+ Returns:
40
+ STFT结果
41
+ """
42
+ return librosa.stft(audio, n_fft=self.n_fft, hop_length=self.hop_length)
43
+
44
+ def compute_magnitude_spectrum(self, audio: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
45
+ """
46
+ 计算幅度谱
47
+
48
+ Args:
49
+ audio: 音频信号
50
+
51
+ Returns:
52
+ 频率轴, 幅度谱
53
+ """
54
+ spectrum = fft(audio)
55
+ magnitude = np.abs(spectrum)
56
+ freqs = fftfreq(len(audio), 1/self.sr)
57
+
58
+ # 只返回正频率部分
59
+ positive_freq_idx = freqs >= 0
60
+ return freqs[positive_freq_idx], magnitude[positive_freq_idx]
61
+
62
+ def spectral_centroid(self, audio: np.ndarray) -> np.ndarray:
63
+ """
64
+ 计算谱重心
65
+
66
+ Args:
67
+ audio: 音频信号
68
+
69
+ Returns:
70
+ 谱重心数组
71
+ """
72
+ return librosa.feature.spectral_centroid(
73
+ y=audio, sr=self.sr, hop_length=self.hop_length
74
+ )[0]
75
+
76
+ def spectral_rolloff(self, audio: np.ndarray, roll_percent: float = 0.85) -> np.ndarray:
77
+ """
78
+ 计算谱滚降
79
+
80
+ Args:
81
+ audio: 音频信号
82
+ roll_percent: 滚降百分比
83
+
84
+ Returns:
85
+ 谱滚降数组
86
+ """
87
+ return librosa.feature.spectral_rolloff(
88
+ y=audio, sr=self.sr, hop_length=self.hop_length, roll_percent=roll_percent
89
+ )[0]
90
+
91
+ def spectral_flatness(self, audio: np.ndarray) -> np.ndarray:
92
+ """
93
+ 计算谱平坦度
94
+
95
+ Args:
96
+ audio: 音频信号
97
+
98
+ Returns:
99
+ 谱平坦度数组
100
+ """
101
+ return librosa.feature.spectral_flatness(
102
+ y=audio, hop_length=self.hop_length
103
+ )[0]
104
+
105
+ def spectral_contrast(self, audio: np.ndarray, n_bands: int = 6) -> np.ndarray:
106
+ """
107
+ 计算谱对比度
108
+
109
+ Args:
110
+ audio: 音频信号
111
+ n_bands: 频段数量
112
+
113
+ Returns:
114
+ 谱对比度矩阵
115
+ """
116
+ return librosa.feature.spectral_contrast(
117
+ y=audio, sr=self.sr, hop_length=self.hop_length, n_bands=n_bands
118
+ )
119
+
120
+ def mfcc_features(self, audio: np.ndarray, n_mfcc: int = 13) -> np.ndarray:
121
+ """
122
+ 提取MFCC特征
123
+
124
+ Args:
125
+ audio: 音频信号
126
+ n_mfcc: MFCC系数数量
127
+
128
+ Returns:
129
+ MFCC特征矩阵
130
+ """
131
+ return librosa.feature.mfcc(
132
+ y=audio, sr=self.sr, n_mfcc=n_mfcc, hop_length=self.hop_length
133
+ )
134
+
135
+ def mel_spectrogram(self, audio: np.ndarray, n_mels: int = 128) -> np.ndarray:
136
+ """
137
+ 计算梅尔频谱图
138
+
139
+ Args:
140
+ audio: 音频信号
141
+ n_mels: 梅尔滤波器组数量
142
+
143
+ Returns:
144
+ 梅尔频谱图
145
+ """
146
+ return librosa.feature.melspectrogram(
147
+ y=audio, sr=self.sr, n_mels=n_mels, hop_length=self.hop_length
148
+ )
149
+
150
+ def chroma_features(self, audio: np.ndarray) -> np.ndarray:
151
+ """
152
+ 提取色度特征
153
+
154
+ Args:
155
+ audio: 音频信号
156
+
157
+ Returns:
158
+ 色度特征矩阵
159
+ """
160
+ return librosa.feature.chroma_stft(
161
+ y=audio, sr=self.sr, hop_length=self.hop_length
162
+ )
163
+
164
+
165
+ def compute_spectral_features(audio: np.ndarray, sr: int = 22050) -> dict:
166
+ """
167
+ 计算完整的频域特征集合
168
+
169
+ Args:
170
+ audio: 音频信号
171
+ sr: 采样率
172
+
173
+ Returns:
174
+ 包含各种频域特征的字典
175
+ """
176
+ analyzer = SpectralAnalyzer(sr=sr)
177
+
178
+ features = {
179
+ 'spectral_centroid': analyzer.spectral_centroid(audio),
180
+ 'spectral_rolloff': analyzer.spectral_rolloff(audio),
181
+ 'spectral_flatness': analyzer.spectral_flatness(audio),
182
+ 'spectral_contrast': analyzer.spectral_contrast(audio),
183
+ 'mfcc': analyzer.mfcc_features(audio),
184
+ 'mel_spectrogram': analyzer.mel_spectrogram(audio),
185
+ 'chroma': analyzer.chroma_features(audio)
186
+ }
187
+
188
+ return features
189
+
190
+
191
+ def frequency_domain_stats(audio: np.ndarray, sr: int = 22050) -> dict:
192
+ """
193
+ 计算频域统计信息
194
+
195
+ Args:
196
+ audio: 音频信号
197
+ sr: 采样率
198
+
199
+ Returns:
200
+ 频域统计信息字典
201
+ """
202
+ analyzer = SpectralAnalyzer(sr=sr)
203
+ freqs, magnitude = analyzer.compute_magnitude_spectrum(audio)
204
+
205
+ # 计算功率谱密度
206
+ power = magnitude ** 2
207
+
208
+ # 计算统计量
209
+ stats = {
210
+ 'mean_frequency': np.average(freqs, weights=power),
211
+ 'std_frequency': np.sqrt(np.average((freqs - np.average(freqs, weights=power))**2, weights=power)),
212
+ 'peak_frequency': freqs[np.argmax(magnitude)],
213
+ 'bandwidth': freqs[np.where(power > 0.5 * np.max(power))][-1] - freqs[np.where(power > 0.5 * np.max(power))][0],
214
+ 'spectral_energy': np.sum(power),
215
+ 'spectral_entropy': -np.sum((power/np.sum(power)) * np.log2(power/np.sum(power) + 1e-10))
216
+ }
217
+
218
+ return stats