neverlib 0.2.6__py3-none-any.whl → 0.2.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- neverlib/.claude/settings.local.json +9 -0
- neverlib/Docs/audio_aug/test_volume.ipynb +416 -0
- neverlib/Docs/audio_aug_test/test_volume.ipynb +289 -0
- neverlib/Docs/filter/biquad.ipynb +129 -0
- neverlib/Docs/filter/filter_family.ipynb +450 -0
- neverlib/Docs/filter/highpass.ipynb +139 -0
- neverlib/Docs/filter/scipy_filter_family.ipynb +110 -0
- neverlib/Docs/vad/VAD_Energy.ipynb +167 -0
- neverlib/Docs/vad/VAD_Silero.ipynb +325 -0
- neverlib/Docs/vad/VAD_WebRTC.ipynb +189 -0
- neverlib/Docs/vad/VAD_funasr.ipynb +192 -0
- neverlib/Docs/vad/VAD_rvADfast.ipynb +162 -0
- neverlib/Docs/vad/VAD_statistics.ipynb +532 -0
- neverlib/Docs/vad/VAD_tenVAD.ipynb +292 -0
- neverlib/Docs/vad/VAD_vadlib.ipynb +168 -0
- neverlib/Docs/vad/VAD_whisper.ipynb +404 -0
- neverlib/QA/gen_init.py +117 -0
- neverlib/QA/get_fun.py +19 -0
- neverlib/__init__.py +21 -4
- neverlib/audio_aug/HarmonicDistortion.py +19 -13
- neverlib/audio_aug/__init__.py +30 -12
- neverlib/audio_aug/audio_aug.py +19 -14
- neverlib/audio_aug/clip_aug.py +15 -18
- neverlib/audio_aug/coder_aug.py +44 -24
- neverlib/audio_aug/coder_aug2.py +54 -37
- neverlib/audio_aug/loss_packet_aug.py +7 -7
- neverlib/audio_aug/quant_aug.py +19 -17
- neverlib/data/000_short_enhance.wav +0 -0
- neverlib/data/3956_speech.wav +0 -0
- neverlib/data/3956_sweep.wav +0 -0
- neverlib/data/vad_example.wav +0 -0
- neverlib/data/white.wav +0 -0
- neverlib/data/white_EQ.wav +0 -0
- neverlib/data/white_matched.wav +0 -0
- neverlib/data_analyze/__init__.py +25 -20
- neverlib/data_analyze/dataset_analyzer.py +109 -114
- neverlib/data_analyze/quality_metrics.py +87 -89
- neverlib/data_analyze/rms_distrubution.py +23 -42
- neverlib/data_analyze/spectral_analysis.py +43 -46
- neverlib/data_analyze/statistics.py +76 -76
- neverlib/data_analyze/temporal_features.py +15 -6
- neverlib/data_analyze/visualization.py +208 -144
- neverlib/filter/__init__.py +17 -20
- neverlib/filter/auto_eq/__init__.py +18 -35
- neverlib/filter/auto_eq/de_eq.py +0 -2
- neverlib/filter/common.py +24 -5
- neverlib/metrics/DNSMOS/bak_ovr.onnx +0 -0
- neverlib/metrics/DNSMOS/model_v8.onnx +0 -0
- neverlib/metrics/DNSMOS/sig.onnx +0 -0
- neverlib/metrics/DNSMOS/sig_bak_ovr.onnx +0 -0
- neverlib/metrics/__init__.py +23 -0
- neverlib/metrics/dnsmos.py +4 -15
- neverlib/metrics/pDNSMOS/sig_bak_ovr.onnx +0 -0
- neverlib/metrics/pesq_c/PESQ +0 -0
- neverlib/metrics/pesq_c/dsp.c +553 -0
- neverlib/metrics/pesq_c/dsp.h +138 -0
- neverlib/metrics/pesq_c/pesq.h +294 -0
- neverlib/metrics/pesq_c/pesqdsp.c +1047 -0
- neverlib/metrics/pesq_c/pesqio.c +392 -0
- neverlib/metrics/pesq_c/pesqmain.c +610 -0
- neverlib/metrics/pesq_c/pesqmod.c +1417 -0
- neverlib/metrics/pesq_c/pesqpar.h +297 -0
- neverlib/metrics/snr.py +5 -1
- neverlib/metrics/spec.py +31 -21
- neverlib/metrics/test_pesq.py +0 -4
- neverlib/tests/test_imports.py +17 -0
- neverlib/utils/__init__.py +26 -15
- neverlib/utils/audio_split.py +5 -1
- neverlib/utils/checkGPU.py +17 -9
- neverlib/utils/lazy_expose.py +29 -0
- neverlib/utils/utils.py +40 -12
- neverlib/vad/__init__.py +33 -25
- neverlib/vad/class_get_speech.py +1 -1
- neverlib/vad/class_vad.py +3 -3
- neverlib/vad/img.png +0 -0
- {neverlib-0.2.6.dist-info → neverlib-0.2.7.dist-info}/METADATA +1 -1
- {neverlib-0.2.6.dist-info → neverlib-0.2.7.dist-info}/RECORD +80 -37
- {neverlib-0.2.6.dist-info → neverlib-0.2.7.dist-info}/WHEEL +0 -0
- {neverlib-0.2.6.dist-info → neverlib-0.2.7.dist-info}/licenses/LICENSE +0 -0
- {neverlib-0.2.6.dist-info → neverlib-0.2.7.dist-info}/top_level.txt +0 -0
|
@@ -18,37 +18,40 @@ from scipy.signal import spectrogram
|
|
|
18
18
|
|
|
19
19
|
class AudioVisualizer:
|
|
20
20
|
"""音频可视化器类"""
|
|
21
|
-
|
|
21
|
+
|
|
22
22
|
def __init__(self, sr: int = 22050, figsize: Tuple[int, int] = (12, 8)):
|
|
23
23
|
"""
|
|
24
24
|
初始化可视化器
|
|
25
|
-
|
|
25
|
+
|
|
26
26
|
Args:
|
|
27
27
|
sr: 采样率
|
|
28
28
|
figsize: 图形大小
|
|
29
29
|
"""
|
|
30
30
|
self.sr = sr
|
|
31
31
|
self.figsize = figsize
|
|
32
|
-
|
|
32
|
+
|
|
33
33
|
# 设置中文字体支持
|
|
34
34
|
plt.rcParams['font.sans-serif'] = ['SimHei', 'DejaVu Sans']
|
|
35
35
|
plt.rcParams['axes.unicode_minus'] = False
|
|
36
|
-
|
|
36
|
+
|
|
37
37
|
# 设置样式
|
|
38
38
|
plt.style.use('default')
|
|
39
39
|
sns.set_palette("husl")
|
|
40
|
-
|
|
41
|
-
def plot_waveform(self,
|
|
42
|
-
|
|
40
|
+
|
|
41
|
+
def plot_waveform(self,
|
|
42
|
+
audio: np.ndarray,
|
|
43
|
+
title: str = "音频波形图",
|
|
44
|
+
show_time: bool = True,
|
|
45
|
+
ax: Optional[plt.Axes] = None) -> plt.Figure:
|
|
43
46
|
"""
|
|
44
47
|
绘制音频波形图
|
|
45
|
-
|
|
48
|
+
|
|
46
49
|
Args:
|
|
47
50
|
audio: 音频信号
|
|
48
51
|
title: 图标题
|
|
49
52
|
show_time: 是否显示时间轴
|
|
50
53
|
ax: matplotlib轴对象
|
|
51
|
-
|
|
54
|
+
|
|
52
55
|
Returns:
|
|
53
56
|
图形对象
|
|
54
57
|
"""
|
|
@@ -56,7 +59,7 @@ class AudioVisualizer:
|
|
|
56
59
|
fig, ax = plt.subplots(figsize=self.figsize)
|
|
57
60
|
else:
|
|
58
61
|
fig = ax.figure
|
|
59
|
-
|
|
62
|
+
|
|
60
63
|
if show_time:
|
|
61
64
|
time_axis = np.linspace(0, len(audio) / self.sr, len(audio))
|
|
62
65
|
ax.plot(time_axis, audio, linewidth=0.5, alpha=0.8)
|
|
@@ -64,30 +67,33 @@ class AudioVisualizer:
|
|
|
64
67
|
else:
|
|
65
68
|
ax.plot(audio, linewidth=0.5, alpha=0.8)
|
|
66
69
|
ax.set_xlabel('样本点')
|
|
67
|
-
|
|
70
|
+
|
|
68
71
|
ax.set_ylabel('幅度')
|
|
69
72
|
ax.set_title(title)
|
|
70
73
|
ax.grid(True, alpha=0.3)
|
|
71
|
-
|
|
74
|
+
|
|
72
75
|
# 添加零线
|
|
73
76
|
ax.axhline(y=0, color='red', linestyle='--', alpha=0.5)
|
|
74
|
-
|
|
77
|
+
|
|
75
78
|
plt.tight_layout()
|
|
76
79
|
return fig
|
|
77
|
-
|
|
78
|
-
def plot_spectrogram(self,
|
|
79
|
-
|
|
80
|
-
|
|
80
|
+
|
|
81
|
+
def plot_spectrogram(self,
|
|
82
|
+
audio: np.ndarray,
|
|
83
|
+
title: str = "频谱图",
|
|
84
|
+
n_fft: int = 2048,
|
|
85
|
+
hop_length: int = 512,
|
|
86
|
+
ax: Optional[plt.Axes] = None) -> plt.Figure:
|
|
81
87
|
"""
|
|
82
88
|
绘制频谱图
|
|
83
|
-
|
|
89
|
+
|
|
84
90
|
Args:
|
|
85
91
|
audio: 音频信号
|
|
86
92
|
title: 图标题
|
|
87
93
|
n_fft: FFT窗口大小
|
|
88
94
|
hop_length: 跳跃长度
|
|
89
95
|
ax: matplotlib轴对象
|
|
90
|
-
|
|
96
|
+
|
|
91
97
|
Returns:
|
|
92
98
|
图形对象
|
|
93
99
|
"""
|
|
@@ -95,37 +101,44 @@ class AudioVisualizer:
|
|
|
95
101
|
fig, ax = plt.subplots(figsize=self.figsize)
|
|
96
102
|
else:
|
|
97
103
|
fig = ax.figure
|
|
98
|
-
|
|
104
|
+
|
|
99
105
|
# 计算频谱图
|
|
100
106
|
D = librosa.stft(audio, n_fft=n_fft, hop_length=hop_length)
|
|
101
107
|
S_db = librosa.amplitude_to_db(np.abs(D), ref=np.max)
|
|
102
|
-
|
|
108
|
+
|
|
103
109
|
# 绘制
|
|
104
|
-
img = librosa.display.specshow(S_db,
|
|
105
|
-
|
|
106
|
-
|
|
110
|
+
img = librosa.display.specshow(S_db,
|
|
111
|
+
sr=self.sr,
|
|
112
|
+
hop_length=hop_length,
|
|
113
|
+
x_axis='time',
|
|
114
|
+
y_axis='hz',
|
|
115
|
+
ax=ax)
|
|
116
|
+
|
|
107
117
|
ax.set_title(title)
|
|
108
118
|
ax.set_xlabel('时间 (s)')
|
|
109
119
|
ax.set_ylabel('频率 (Hz)')
|
|
110
|
-
|
|
120
|
+
|
|
111
121
|
# 添加颜色条
|
|
112
122
|
cbar = plt.colorbar(img, ax=ax, format='%+2.0f dB')
|
|
113
123
|
cbar.set_label('幅度 (dB)')
|
|
114
|
-
|
|
124
|
+
|
|
115
125
|
plt.tight_layout()
|
|
116
126
|
return fig
|
|
117
|
-
|
|
118
|
-
def plot_mel_spectrogram(self,
|
|
119
|
-
|
|
127
|
+
|
|
128
|
+
def plot_mel_spectrogram(self,
|
|
129
|
+
audio: np.ndarray,
|
|
130
|
+
title: str = "梅尔频谱图",
|
|
131
|
+
n_mels: int = 128,
|
|
132
|
+
ax: Optional[plt.Axes] = None) -> plt.Figure:
|
|
120
133
|
"""
|
|
121
134
|
绘制梅尔频谱图
|
|
122
|
-
|
|
135
|
+
|
|
123
136
|
Args:
|
|
124
137
|
audio: 音频信号
|
|
125
138
|
title: 图标题
|
|
126
139
|
n_mels: 梅尔滤波器数量
|
|
127
140
|
ax: matplotlib轴对象
|
|
128
|
-
|
|
141
|
+
|
|
129
142
|
Returns:
|
|
130
143
|
图形对象
|
|
131
144
|
"""
|
|
@@ -133,37 +146,43 @@ class AudioVisualizer:
|
|
|
133
146
|
fig, ax = plt.subplots(figsize=self.figsize)
|
|
134
147
|
else:
|
|
135
148
|
fig = ax.figure
|
|
136
|
-
|
|
149
|
+
|
|
137
150
|
# 计算梅尔频谱图
|
|
138
151
|
S = librosa.feature.melspectrogram(y=audio, sr=self.sr, n_mels=n_mels)
|
|
139
152
|
S_db = librosa.power_to_db(S, ref=np.max)
|
|
140
|
-
|
|
153
|
+
|
|
141
154
|
# 绘制
|
|
142
|
-
img = librosa.display.specshow(S_db,
|
|
143
|
-
|
|
144
|
-
|
|
155
|
+
img = librosa.display.specshow(S_db,
|
|
156
|
+
sr=self.sr,
|
|
157
|
+
x_axis='time',
|
|
158
|
+
y_axis='mel',
|
|
159
|
+
ax=ax)
|
|
160
|
+
|
|
145
161
|
ax.set_title(title)
|
|
146
162
|
ax.set_xlabel('时间 (s)')
|
|
147
163
|
ax.set_ylabel('梅尔频率')
|
|
148
|
-
|
|
164
|
+
|
|
149
165
|
# 添加颜色条
|
|
150
166
|
cbar = plt.colorbar(img, ax=ax, format='%+2.0f dB')
|
|
151
167
|
cbar.set_label('功率 (dB)')
|
|
152
|
-
|
|
168
|
+
|
|
153
169
|
plt.tight_layout()
|
|
154
170
|
return fig
|
|
155
|
-
|
|
156
|
-
def plot_spectrum(self,
|
|
157
|
-
|
|
171
|
+
|
|
172
|
+
def plot_spectrum(self,
|
|
173
|
+
audio: np.ndarray,
|
|
174
|
+
title: str = "频谱",
|
|
175
|
+
log_scale: bool = True,
|
|
176
|
+
ax: Optional[plt.Axes] = None) -> plt.Figure:
|
|
158
177
|
"""
|
|
159
178
|
绘制频谱
|
|
160
|
-
|
|
179
|
+
|
|
161
180
|
Args:
|
|
162
181
|
audio: 音频信号
|
|
163
182
|
title: 图标题
|
|
164
183
|
log_scale: 是否使用对数刻度
|
|
165
184
|
ax: matplotlib轴对象
|
|
166
|
-
|
|
185
|
+
|
|
167
186
|
Returns:
|
|
168
187
|
图形对象
|
|
169
188
|
"""
|
|
@@ -171,17 +190,17 @@ class AudioVisualizer:
|
|
|
171
190
|
fig, ax = plt.subplots(figsize=self.figsize)
|
|
172
191
|
else:
|
|
173
192
|
fig = ax.figure
|
|
174
|
-
|
|
193
|
+
|
|
175
194
|
# 计算FFT
|
|
176
195
|
fft_data = np.fft.fft(audio)
|
|
177
196
|
magnitude = np.abs(fft_data)
|
|
178
|
-
freqs = np.fft.fftfreq(len(audio), 1/self.sr)
|
|
179
|
-
|
|
197
|
+
freqs = np.fft.fftfreq(len(audio), 1 / self.sr)
|
|
198
|
+
|
|
180
199
|
# 只取正频率部分
|
|
181
200
|
positive_idx = freqs >= 0
|
|
182
201
|
freqs = freqs[positive_idx]
|
|
183
202
|
magnitude = magnitude[positive_idx]
|
|
184
|
-
|
|
203
|
+
|
|
185
204
|
if log_scale:
|
|
186
205
|
magnitude_db = 20 * np.log10(magnitude + 1e-10)
|
|
187
206
|
ax.plot(freqs, magnitude_db)
|
|
@@ -189,280 +208,325 @@ class AudioVisualizer:
|
|
|
189
208
|
else:
|
|
190
209
|
ax.plot(freqs, magnitude)
|
|
191
210
|
ax.set_ylabel('幅度')
|
|
192
|
-
|
|
211
|
+
|
|
193
212
|
ax.set_xlabel('频率 (Hz)')
|
|
194
213
|
ax.set_title(title)
|
|
195
214
|
ax.grid(True, alpha=0.3)
|
|
196
|
-
|
|
215
|
+
|
|
197
216
|
plt.tight_layout()
|
|
198
217
|
return fig
|
|
199
|
-
|
|
200
|
-
def plot_features_comparison(self,
|
|
201
|
-
|
|
218
|
+
|
|
219
|
+
def plot_features_comparison(self,
|
|
220
|
+
features_dict: Dict[str, np.ndarray],
|
|
221
|
+
title: str = "特征对比") -> plt.Figure:
|
|
202
222
|
"""
|
|
203
223
|
绘制多个特征的对比图
|
|
204
|
-
|
|
224
|
+
|
|
205
225
|
Args:
|
|
206
226
|
features_dict: 特征字典 {特征名: 特征值数组}
|
|
207
227
|
title: 图标题
|
|
208
|
-
|
|
228
|
+
|
|
209
229
|
Returns:
|
|
210
230
|
图形对象
|
|
211
231
|
"""
|
|
212
232
|
n_features = len(features_dict)
|
|
213
|
-
fig, axes = plt.subplots(n_features,
|
|
214
|
-
|
|
233
|
+
fig, axes = plt.subplots(n_features,
|
|
234
|
+
1,
|
|
235
|
+
figsize=(self.figsize[0],
|
|
236
|
+
self.figsize[1] * n_features / 2))
|
|
237
|
+
|
|
215
238
|
if n_features == 1:
|
|
216
239
|
axes = [axes]
|
|
217
|
-
|
|
218
|
-
for i, (feature_name,
|
|
240
|
+
|
|
241
|
+
for i, (feature_name,
|
|
242
|
+
feature_values) in enumerate(features_dict.items()):
|
|
219
243
|
if len(feature_values.shape) == 1:
|
|
220
244
|
# 一维特征
|
|
221
|
-
time_axis = np.linspace(0,
|
|
245
|
+
time_axis = np.linspace(0,
|
|
246
|
+
len(feature_values) / (self.sr / 512),
|
|
247
|
+
len(feature_values))
|
|
222
248
|
axes[i].plot(time_axis, feature_values)
|
|
223
249
|
axes[i].set_ylabel(feature_name)
|
|
224
250
|
else:
|
|
225
251
|
# 二维特征(如MFCC)
|
|
226
|
-
img = axes[i].imshow(feature_values,
|
|
252
|
+
img = axes[i].imshow(feature_values,
|
|
253
|
+
aspect='auto',
|
|
254
|
+
origin='lower')
|
|
227
255
|
axes[i].set_ylabel(feature_name)
|
|
228
256
|
plt.colorbar(img, ax=axes[i])
|
|
229
|
-
|
|
257
|
+
|
|
230
258
|
axes[i].set_title(f'{feature_name} 特征')
|
|
231
259
|
axes[i].grid(True, alpha=0.3)
|
|
232
|
-
|
|
260
|
+
|
|
233
261
|
axes[-1].set_xlabel('时间 (s)')
|
|
234
262
|
plt.suptitle(title)
|
|
235
263
|
plt.tight_layout()
|
|
236
264
|
return fig
|
|
237
|
-
|
|
238
|
-
def plot_statistics_distribution(self,
|
|
239
|
-
|
|
265
|
+
|
|
266
|
+
def plot_statistics_distribution(self,
|
|
267
|
+
stats_dict: Dict[str, List[float]],
|
|
268
|
+
title: str = "统计分布图") -> plt.Figure:
|
|
240
269
|
"""
|
|
241
270
|
绘制统计分布图
|
|
242
|
-
|
|
271
|
+
|
|
243
272
|
Args:
|
|
244
273
|
stats_dict: 统计数据字典
|
|
245
274
|
title: 图标题
|
|
246
|
-
|
|
275
|
+
|
|
247
276
|
Returns:
|
|
248
277
|
图形对象
|
|
249
278
|
"""
|
|
250
279
|
n_stats = len(stats_dict)
|
|
251
|
-
fig, axes = plt.subplots(2, (n_stats + 1) // 2,
|
|
252
|
-
|
|
280
|
+
fig, axes = plt.subplots(2, (n_stats + 1) // 2,
|
|
281
|
+
figsize=(self.figsize[0], self.figsize[1]))
|
|
282
|
+
|
|
253
283
|
if n_stats == 1:
|
|
254
284
|
axes = [axes]
|
|
255
285
|
elif n_stats == 2:
|
|
256
286
|
axes = axes.flatten()
|
|
257
287
|
else:
|
|
258
288
|
axes = axes.flatten()
|
|
259
|
-
|
|
289
|
+
|
|
260
290
|
for i, (stat_name, values) in enumerate(stats_dict.items()):
|
|
261
291
|
if i >= len(axes):
|
|
262
292
|
break
|
|
263
|
-
|
|
293
|
+
|
|
264
294
|
# 绘制直方图和KDE
|
|
265
|
-
axes[i].hist(values,
|
|
266
|
-
|
|
295
|
+
axes[i].hist(values,
|
|
296
|
+
bins=30,
|
|
297
|
+
alpha=0.7,
|
|
298
|
+
density=True,
|
|
299
|
+
color='skyblue')
|
|
300
|
+
|
|
267
301
|
try:
|
|
268
302
|
sns.kdeplot(values, ax=axes[i], color='red')
|
|
269
303
|
except:
|
|
270
304
|
pass
|
|
271
|
-
|
|
305
|
+
|
|
272
306
|
axes[i].set_title(f'{stat_name} 分布')
|
|
273
307
|
axes[i].set_xlabel(stat_name)
|
|
274
308
|
axes[i].set_ylabel('密度')
|
|
275
309
|
axes[i].grid(True, alpha=0.3)
|
|
276
|
-
|
|
310
|
+
|
|
277
311
|
# 隐藏未使用的子图
|
|
278
312
|
for j in range(i + 1, len(axes)):
|
|
279
313
|
axes[j].set_visible(False)
|
|
280
|
-
|
|
314
|
+
|
|
281
315
|
plt.suptitle(title)
|
|
282
316
|
plt.tight_layout()
|
|
283
317
|
return fig
|
|
284
|
-
|
|
285
|
-
def plot_rms_distribution(self,
|
|
286
|
-
|
|
318
|
+
|
|
319
|
+
def plot_rms_distribution(self,
|
|
320
|
+
rms_values: List[float],
|
|
321
|
+
title: str = "RMS分布图") -> plt.Figure:
|
|
287
322
|
"""
|
|
288
323
|
绘制RMS分布图
|
|
289
|
-
|
|
324
|
+
|
|
290
325
|
Args:
|
|
291
326
|
rms_values: RMS值列表
|
|
292
327
|
title: 图标题
|
|
293
|
-
|
|
328
|
+
|
|
294
329
|
Returns:
|
|
295
330
|
图形对象
|
|
296
331
|
"""
|
|
297
332
|
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=self.figsize)
|
|
298
|
-
|
|
333
|
+
|
|
299
334
|
# 线性尺度分布
|
|
300
|
-
ax1.hist(rms_values,
|
|
335
|
+
ax1.hist(rms_values,
|
|
336
|
+
bins=50,
|
|
337
|
+
alpha=0.7,
|
|
338
|
+
color='lightblue',
|
|
339
|
+
edgecolor='black')
|
|
301
340
|
ax1.set_xlabel('RMS 幅度')
|
|
302
341
|
ax1.set_ylabel('频次')
|
|
303
342
|
ax1.set_title('RMS 线性分布')
|
|
304
343
|
ax1.grid(True, alpha=0.3)
|
|
305
|
-
|
|
344
|
+
|
|
306
345
|
# 对数尺度分布
|
|
307
346
|
rms_db = [20 * np.log10(rms + 1e-10) for rms in rms_values]
|
|
308
|
-
ax2.hist(rms_db,
|
|
347
|
+
ax2.hist(rms_db,
|
|
348
|
+
bins=50,
|
|
349
|
+
alpha=0.7,
|
|
350
|
+
color='lightgreen',
|
|
351
|
+
edgecolor='black')
|
|
309
352
|
ax2.set_xlabel('RMS (dB)')
|
|
310
353
|
ax2.set_ylabel('频次')
|
|
311
354
|
ax2.set_title('RMS 对数分布')
|
|
312
355
|
ax2.grid(True, alpha=0.3)
|
|
313
|
-
|
|
356
|
+
|
|
314
357
|
plt.suptitle(title)
|
|
315
358
|
plt.tight_layout()
|
|
316
359
|
return fig
|
|
317
|
-
|
|
318
|
-
def plot_audio_comparison(self,
|
|
319
|
-
|
|
360
|
+
|
|
361
|
+
def plot_audio_comparison(self,
|
|
362
|
+
audio1: np.ndarray,
|
|
363
|
+
audio2: np.ndarray,
|
|
364
|
+
labels: List[str] = None,
|
|
365
|
+
title: str = "音频对比") -> plt.Figure:
|
|
320
366
|
"""
|
|
321
367
|
绘制两个音频的对比图
|
|
322
|
-
|
|
368
|
+
|
|
323
369
|
Args:
|
|
324
370
|
audio1: 第一个音频
|
|
325
371
|
audio2: 第二个音频
|
|
326
372
|
labels: 标签列表
|
|
327
373
|
title: 图标题
|
|
328
|
-
|
|
374
|
+
|
|
329
375
|
Returns:
|
|
330
376
|
图形对象
|
|
331
377
|
"""
|
|
332
378
|
if labels is None:
|
|
333
379
|
labels = ['音频1', '音频2']
|
|
334
|
-
|
|
335
|
-
fig, axes = plt.subplots(3,
|
|
336
|
-
|
|
380
|
+
|
|
381
|
+
fig, axes = plt.subplots(3,
|
|
382
|
+
2,
|
|
383
|
+
figsize=(self.figsize[0],
|
|
384
|
+
self.figsize[1] * 1.5))
|
|
385
|
+
|
|
337
386
|
# 时域波形对比
|
|
338
387
|
time1 = np.linspace(0, len(audio1) / self.sr, len(audio1))
|
|
339
388
|
time2 = np.linspace(0, len(audio2) / self.sr, len(audio2))
|
|
340
|
-
|
|
389
|
+
|
|
341
390
|
axes[0, 0].plot(time1, audio1, alpha=0.8)
|
|
342
391
|
axes[0, 0].set_title(f'{labels[0]} - 波形')
|
|
343
392
|
axes[0, 0].set_xlabel('时间 (s)')
|
|
344
393
|
axes[0, 0].set_ylabel('幅度')
|
|
345
394
|
axes[0, 0].grid(True, alpha=0.3)
|
|
346
|
-
|
|
395
|
+
|
|
347
396
|
axes[0, 1].plot(time2, audio2, alpha=0.8, color='orange')
|
|
348
397
|
axes[0, 1].set_title(f'{labels[1]} - 波形')
|
|
349
398
|
axes[0, 1].set_xlabel('时间 (s)')
|
|
350
399
|
axes[0, 1].set_ylabel('幅度')
|
|
351
400
|
axes[0, 1].grid(True, alpha=0.3)
|
|
352
|
-
|
|
401
|
+
|
|
353
402
|
# 频谱对比
|
|
354
403
|
self.plot_spectrum(audio1, f'{labels[0]} - 频谱', ax=axes[1, 0])
|
|
355
404
|
self.plot_spectrum(audio2, f'{labels[1]} - 频谱', ax=axes[1, 1])
|
|
356
|
-
|
|
405
|
+
|
|
357
406
|
# 频谱图对比
|
|
358
407
|
self.plot_spectrogram(audio1, f'{labels[0]} - 频谱图', ax=axes[2, 0])
|
|
359
408
|
self.plot_spectrogram(audio2, f'{labels[1]} - 频谱图', ax=axes[2, 1])
|
|
360
|
-
|
|
409
|
+
|
|
361
410
|
plt.suptitle(title)
|
|
362
411
|
plt.tight_layout()
|
|
363
412
|
return fig
|
|
364
413
|
|
|
365
414
|
|
|
366
|
-
def plot_dataset_overview(file_paths: List[str],
|
|
367
|
-
|
|
415
|
+
def plot_dataset_overview(file_paths: List[str],
|
|
416
|
+
max_files: int = 10,
|
|
417
|
+
sr: int = 22050) -> plt.Figure:
|
|
368
418
|
"""
|
|
369
419
|
绘制数据集概览
|
|
370
|
-
|
|
420
|
+
|
|
371
421
|
Args:
|
|
372
422
|
file_paths: 音频文件路径列表
|
|
373
423
|
max_files: 最大显示文件数
|
|
374
424
|
sr: 采样率
|
|
375
|
-
|
|
425
|
+
|
|
376
426
|
Returns:
|
|
377
427
|
图形对象
|
|
378
428
|
"""
|
|
379
429
|
visualizer = AudioVisualizer(sr=sr)
|
|
380
|
-
|
|
430
|
+
|
|
381
431
|
# 限制文件数量
|
|
382
432
|
selected_files = file_paths[:max_files]
|
|
383
|
-
|
|
384
|
-
fig, axes = plt.subplots(len(selected_files),
|
|
385
|
-
|
|
386
|
-
|
|
433
|
+
|
|
434
|
+
fig, axes = plt.subplots(len(selected_files),
|
|
435
|
+
2,
|
|
436
|
+
figsize=(15, 3 * len(selected_files)))
|
|
437
|
+
|
|
387
438
|
if len(selected_files) == 1:
|
|
388
439
|
axes = axes.reshape(1, -1)
|
|
389
|
-
|
|
440
|
+
|
|
390
441
|
for i, file_path in enumerate(selected_files):
|
|
391
442
|
try:
|
|
392
443
|
audio, _ = librosa.load(file_path, sr=sr)
|
|
393
|
-
|
|
444
|
+
|
|
394
445
|
# 波形图
|
|
395
|
-
visualizer.plot_waveform(audio, f'文件 {i+1}: 波形', ax=axes[i, 0])
|
|
396
|
-
|
|
446
|
+
visualizer.plot_waveform(audio, f'文件 {i + 1}: 波形', ax=axes[i, 0])
|
|
447
|
+
|
|
397
448
|
# 频谱图
|
|
398
|
-
visualizer.plot_spectrogram(audio, f'文件 {i+1}: 频谱图', ax=axes[i, 1])
|
|
399
|
-
|
|
449
|
+
visualizer.plot_spectrogram(audio, f'文件 {i + 1}: 频谱图', ax=axes[i, 1])
|
|
450
|
+
|
|
400
451
|
except Exception as e:
|
|
401
|
-
axes[i, 0].text(0.5,
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
452
|
+
axes[i, 0].text(0.5,
|
|
453
|
+
0.5,
|
|
454
|
+
f'加载失败: {str(e)}',
|
|
455
|
+
ha='center',
|
|
456
|
+
va='center',
|
|
457
|
+
transform=axes[i, 0].transAxes)
|
|
458
|
+
axes[i, 1].text(0.5,
|
|
459
|
+
0.5,
|
|
460
|
+
f'加载失败: {str(e)}',
|
|
461
|
+
ha='center',
|
|
462
|
+
va='center',
|
|
463
|
+
transform=axes[i, 1].transAxes)
|
|
464
|
+
|
|
406
465
|
plt.suptitle('数据集概览')
|
|
407
466
|
plt.tight_layout()
|
|
408
467
|
return fig
|
|
409
468
|
|
|
410
469
|
|
|
411
|
-
def create_analysis_dashboard(audio: np.ndarray,
|
|
470
|
+
def create_analysis_dashboard(audio: np.ndarray,
|
|
471
|
+
sr: int = 22050) -> plt.Figure:
|
|
412
472
|
"""
|
|
413
473
|
创建音频分析仪表板
|
|
414
|
-
|
|
474
|
+
|
|
415
475
|
Args:
|
|
416
476
|
audio: 音频信号
|
|
417
477
|
sr: 采样率
|
|
418
|
-
|
|
478
|
+
|
|
419
479
|
Returns:
|
|
420
480
|
仪表板图形对象
|
|
421
481
|
"""
|
|
422
482
|
visualizer = AudioVisualizer(sr=sr)
|
|
423
|
-
|
|
483
|
+
|
|
424
484
|
fig = plt.figure(figsize=(16, 12))
|
|
425
|
-
|
|
485
|
+
|
|
426
486
|
# 创建网格布局
|
|
427
487
|
gs = fig.add_gridspec(3, 3, hspace=0.3, wspace=0.3)
|
|
428
|
-
|
|
488
|
+
|
|
429
489
|
# 时域波形
|
|
430
490
|
ax1 = fig.add_subplot(gs[0, :])
|
|
431
491
|
visualizer.plot_waveform(audio, "时域波形", ax=ax1)
|
|
432
|
-
|
|
492
|
+
|
|
433
493
|
# 频谱图
|
|
434
494
|
ax2 = fig.add_subplot(gs[1, :2])
|
|
435
495
|
visualizer.plot_spectrogram(audio, "频谱图", ax=ax2)
|
|
436
|
-
|
|
496
|
+
|
|
437
497
|
# 频谱
|
|
438
498
|
ax3 = fig.add_subplot(gs[1, 2])
|
|
439
499
|
visualizer.plot_spectrum(audio, "频谱", ax=ax3)
|
|
440
|
-
|
|
500
|
+
|
|
441
501
|
# 梅尔频谱图
|
|
442
502
|
ax4 = fig.add_subplot(gs[2, :2])
|
|
443
503
|
visualizer.plot_mel_spectrogram(audio, "梅尔频谱图", ax=ax4)
|
|
444
|
-
|
|
504
|
+
|
|
445
505
|
# 特征统计
|
|
446
506
|
ax5 = fig.add_subplot(gs[2, 2])
|
|
447
|
-
|
|
507
|
+
|
|
448
508
|
# 计算基本统计
|
|
449
509
|
duration = len(audio) / sr
|
|
450
510
|
max_amp = np.max(np.abs(audio))
|
|
451
|
-
rms_amp = np.sqrt(np.mean(audio
|
|
452
|
-
|
|
511
|
+
rms_amp = np.sqrt(np.mean(audio**2))
|
|
512
|
+
|
|
453
513
|
stats_text = f"""音频统计信息:
|
|
454
|
-
时长: {duration:.2f}s
|
|
455
|
-
最大幅度: {max_amp:.4f}
|
|
456
|
-
RMS: {rms_amp:.4f}
|
|
457
|
-
RMS (dB): {20*np.log10(rms_amp):.2f}
|
|
458
|
-
采样率: {sr} Hz
|
|
459
|
-
样本数: {len(audio)}
|
|
460
|
-
"""
|
|
461
|
-
|
|
462
|
-
ax5.text(0.1,
|
|
463
|
-
|
|
514
|
+
时长: {duration:.2f}s
|
|
515
|
+
最大幅度: {max_amp:.4f}
|
|
516
|
+
RMS: {rms_amp:.4f}
|
|
517
|
+
RMS (dB): {20 * np.log10(rms_amp):.2f}
|
|
518
|
+
采样率: {sr} Hz
|
|
519
|
+
样本数: {len(audio)}
|
|
520
|
+
"""
|
|
521
|
+
|
|
522
|
+
ax5.text(0.1,
|
|
523
|
+
0.5,
|
|
524
|
+
stats_text,
|
|
525
|
+
transform=ax5.transAxes,
|
|
526
|
+
fontsize=10,
|
|
527
|
+
verticalalignment='center')
|
|
464
528
|
ax5.set_title("统计信息")
|
|
465
529
|
ax5.axis('off')
|
|
466
|
-
|
|
530
|
+
|
|
467
531
|
plt.suptitle("音频分析仪表板", fontsize=16)
|
|
468
|
-
return fig
|
|
532
|
+
return fig
|