neverlib 0.2.5__py3-none-any.whl → 0.2.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. neverlib/.claude/settings.local.json +9 -0
  2. neverlib/Docs/audio_aug/test_volume.ipynb +416 -0
  3. neverlib/Docs/audio_aug_test/test_volume.ipynb +289 -0
  4. neverlib/Docs/filter/biquad.ipynb +129 -0
  5. neverlib/Docs/filter/filter_family.ipynb +450 -0
  6. neverlib/Docs/filter/highpass.ipynb +139 -0
  7. neverlib/Docs/filter/scipy_filter_family.ipynb +110 -0
  8. neverlib/Docs/vad/VAD_Energy.ipynb +167 -0
  9. neverlib/Docs/vad/VAD_Silero.ipynb +325 -0
  10. neverlib/Docs/vad/VAD_WebRTC.ipynb +189 -0
  11. neverlib/Docs/vad/VAD_funasr.ipynb +192 -0
  12. neverlib/Docs/vad/VAD_rvADfast.ipynb +162 -0
  13. neverlib/Docs/vad/VAD_statistics.ipynb +532 -0
  14. neverlib/Docs/vad/VAD_tenVAD.ipynb +292 -0
  15. neverlib/Docs/vad/VAD_vadlib.ipynb +168 -0
  16. neverlib/Docs/vad/VAD_whisper.ipynb +404 -0
  17. neverlib/QA/gen_init.py +117 -0
  18. neverlib/QA/get_fun.py +19 -0
  19. neverlib/__init__.py +21 -4
  20. neverlib/audio_aug/HarmonicDistortion.py +19 -13
  21. neverlib/audio_aug/__init__.py +30 -12
  22. neverlib/audio_aug/audio_aug.py +19 -14
  23. neverlib/audio_aug/clip_aug.py +15 -18
  24. neverlib/audio_aug/coder_aug.py +44 -24
  25. neverlib/audio_aug/coder_aug2.py +54 -37
  26. neverlib/audio_aug/loss_packet_aug.py +7 -7
  27. neverlib/audio_aug/quant_aug.py +19 -17
  28. neverlib/data/000_short_enhance.wav +0 -0
  29. neverlib/data/3956_speech.wav +0 -0
  30. neverlib/data/3956_sweep.wav +0 -0
  31. neverlib/data/vad_example.wav +0 -0
  32. neverlib/data/white.wav +0 -0
  33. neverlib/data/white_EQ.wav +0 -0
  34. neverlib/data/white_matched.wav +0 -0
  35. neverlib/data_analyze/__init__.py +25 -20
  36. neverlib/data_analyze/dataset_analyzer.py +109 -114
  37. neverlib/data_analyze/quality_metrics.py +87 -89
  38. neverlib/data_analyze/rms_distrubution.py +23 -42
  39. neverlib/data_analyze/spectral_analysis.py +43 -46
  40. neverlib/data_analyze/statistics.py +76 -76
  41. neverlib/data_analyze/temporal_features.py +15 -6
  42. neverlib/data_analyze/visualization.py +208 -144
  43. neverlib/filter/__init__.py +17 -20
  44. neverlib/filter/auto_eq/__init__.py +18 -35
  45. neverlib/filter/auto_eq/de_eq.py +0 -2
  46. neverlib/filter/common.py +24 -5
  47. neverlib/metrics/DNSMOS/bak_ovr.onnx +0 -0
  48. neverlib/metrics/DNSMOS/model_v8.onnx +0 -0
  49. neverlib/metrics/DNSMOS/sig.onnx +0 -0
  50. neverlib/metrics/DNSMOS/sig_bak_ovr.onnx +0 -0
  51. neverlib/metrics/__init__.py +23 -0
  52. neverlib/metrics/dnsmos.py +4 -15
  53. neverlib/metrics/pDNSMOS/sig_bak_ovr.onnx +0 -0
  54. neverlib/metrics/pesq_c/PESQ +0 -0
  55. neverlib/metrics/pesq_c/dsp.c +553 -0
  56. neverlib/metrics/pesq_c/dsp.h +138 -0
  57. neverlib/metrics/pesq_c/pesq.h +294 -0
  58. neverlib/metrics/pesq_c/pesqdsp.c +1047 -0
  59. neverlib/metrics/pesq_c/pesqio.c +392 -0
  60. neverlib/metrics/pesq_c/pesqmain.c +610 -0
  61. neverlib/metrics/pesq_c/pesqmod.c +1417 -0
  62. neverlib/metrics/pesq_c/pesqpar.h +297 -0
  63. neverlib/metrics/snr.py +5 -1
  64. neverlib/metrics/spec.py +31 -21
  65. neverlib/metrics/test_pesq.py +0 -4
  66. neverlib/tests/test_imports.py +17 -0
  67. neverlib/utils/__init__.py +26 -15
  68. neverlib/utils/audio_split.py +5 -1
  69. neverlib/utils/checkGPU.py +17 -9
  70. neverlib/utils/lazy_expose.py +29 -0
  71. neverlib/utils/utils.py +40 -12
  72. neverlib/vad/__init__.py +33 -25
  73. neverlib/vad/class_get_speech.py +1 -1
  74. neverlib/vad/class_vad.py +3 -3
  75. neverlib/vad/img.png +0 -0
  76. {neverlib-0.2.5.dist-info → neverlib-0.2.7.dist-info}/METADATA +20 -17
  77. {neverlib-0.2.5.dist-info → neverlib-0.2.7.dist-info}/RECORD +80 -37
  78. {neverlib-0.2.5.dist-info → neverlib-0.2.7.dist-info}/WHEEL +0 -0
  79. {neverlib-0.2.5.dist-info → neverlib-0.2.7.dist-info}/licenses/LICENSE +0 -0
  80. {neverlib-0.2.5.dist-info → neverlib-0.2.7.dist-info}/top_level.txt +0 -0
@@ -18,37 +18,40 @@ from scipy.signal import spectrogram
18
18
 
19
19
  class AudioVisualizer:
20
20
  """音频可视化器类"""
21
-
21
+
22
22
  def __init__(self, sr: int = 22050, figsize: Tuple[int, int] = (12, 8)):
23
23
  """
24
24
  初始化可视化器
25
-
25
+
26
26
  Args:
27
27
  sr: 采样率
28
28
  figsize: 图形大小
29
29
  """
30
30
  self.sr = sr
31
31
  self.figsize = figsize
32
-
32
+
33
33
  # 设置中文字体支持
34
34
  plt.rcParams['font.sans-serif'] = ['SimHei', 'DejaVu Sans']
35
35
  plt.rcParams['axes.unicode_minus'] = False
36
-
36
+
37
37
  # 设置样式
38
38
  plt.style.use('default')
39
39
  sns.set_palette("husl")
40
-
41
- def plot_waveform(self, audio: np.ndarray, title: str = "音频波形图",
42
- show_time: bool = True, ax: Optional[plt.Axes] = None) -> plt.Figure:
40
+
41
+ def plot_waveform(self,
42
+ audio: np.ndarray,
43
+ title: str = "音频波形图",
44
+ show_time: bool = True,
45
+ ax: Optional[plt.Axes] = None) -> plt.Figure:
43
46
  """
44
47
  绘制音频波形图
45
-
48
+
46
49
  Args:
47
50
  audio: 音频信号
48
51
  title: 图标题
49
52
  show_time: 是否显示时间轴
50
53
  ax: matplotlib轴对象
51
-
54
+
52
55
  Returns:
53
56
  图形对象
54
57
  """
@@ -56,7 +59,7 @@ class AudioVisualizer:
56
59
  fig, ax = plt.subplots(figsize=self.figsize)
57
60
  else:
58
61
  fig = ax.figure
59
-
62
+
60
63
  if show_time:
61
64
  time_axis = np.linspace(0, len(audio) / self.sr, len(audio))
62
65
  ax.plot(time_axis, audio, linewidth=0.5, alpha=0.8)
@@ -64,30 +67,33 @@ class AudioVisualizer:
64
67
  else:
65
68
  ax.plot(audio, linewidth=0.5, alpha=0.8)
66
69
  ax.set_xlabel('样本点')
67
-
70
+
68
71
  ax.set_ylabel('幅度')
69
72
  ax.set_title(title)
70
73
  ax.grid(True, alpha=0.3)
71
-
74
+
72
75
  # 添加零线
73
76
  ax.axhline(y=0, color='red', linestyle='--', alpha=0.5)
74
-
77
+
75
78
  plt.tight_layout()
76
79
  return fig
77
-
78
- def plot_spectrogram(self, audio: np.ndarray, title: str = "频谱图",
79
- n_fft: int = 2048, hop_length: int = 512,
80
- ax: Optional[plt.Axes] = None) -> plt.Figure:
80
+
81
+ def plot_spectrogram(self,
82
+ audio: np.ndarray,
83
+ title: str = "频谱图",
84
+ n_fft: int = 2048,
85
+ hop_length: int = 512,
86
+ ax: Optional[plt.Axes] = None) -> plt.Figure:
81
87
  """
82
88
  绘制频谱图
83
-
89
+
84
90
  Args:
85
91
  audio: 音频信号
86
92
  title: 图标题
87
93
  n_fft: FFT窗口大小
88
94
  hop_length: 跳跃长度
89
95
  ax: matplotlib轴对象
90
-
96
+
91
97
  Returns:
92
98
  图形对象
93
99
  """
@@ -95,37 +101,44 @@ class AudioVisualizer:
95
101
  fig, ax = plt.subplots(figsize=self.figsize)
96
102
  else:
97
103
  fig = ax.figure
98
-
104
+
99
105
  # 计算频谱图
100
106
  D = librosa.stft(audio, n_fft=n_fft, hop_length=hop_length)
101
107
  S_db = librosa.amplitude_to_db(np.abs(D), ref=np.max)
102
-
108
+
103
109
  # 绘制
104
- img = librosa.display.specshow(S_db, sr=self.sr, hop_length=hop_length,
105
- x_axis='time', y_axis='hz', ax=ax)
106
-
110
+ img = librosa.display.specshow(S_db,
111
+ sr=self.sr,
112
+ hop_length=hop_length,
113
+ x_axis='time',
114
+ y_axis='hz',
115
+ ax=ax)
116
+
107
117
  ax.set_title(title)
108
118
  ax.set_xlabel('时间 (s)')
109
119
  ax.set_ylabel('频率 (Hz)')
110
-
120
+
111
121
  # 添加颜色条
112
122
  cbar = plt.colorbar(img, ax=ax, format='%+2.0f dB')
113
123
  cbar.set_label('幅度 (dB)')
114
-
124
+
115
125
  plt.tight_layout()
116
126
  return fig
117
-
118
- def plot_mel_spectrogram(self, audio: np.ndarray, title: str = "梅尔频谱图",
119
- n_mels: int = 128, ax: Optional[plt.Axes] = None) -> plt.Figure:
127
+
128
+ def plot_mel_spectrogram(self,
129
+ audio: np.ndarray,
130
+ title: str = "梅尔频谱图",
131
+ n_mels: int = 128,
132
+ ax: Optional[plt.Axes] = None) -> plt.Figure:
120
133
  """
121
134
  绘制梅尔频谱图
122
-
135
+
123
136
  Args:
124
137
  audio: 音频信号
125
138
  title: 图标题
126
139
  n_mels: 梅尔滤波器数量
127
140
  ax: matplotlib轴对象
128
-
141
+
129
142
  Returns:
130
143
  图形对象
131
144
  """
@@ -133,37 +146,43 @@ class AudioVisualizer:
133
146
  fig, ax = plt.subplots(figsize=self.figsize)
134
147
  else:
135
148
  fig = ax.figure
136
-
149
+
137
150
  # 计算梅尔频谱图
138
151
  S = librosa.feature.melspectrogram(y=audio, sr=self.sr, n_mels=n_mels)
139
152
  S_db = librosa.power_to_db(S, ref=np.max)
140
-
153
+
141
154
  # 绘制
142
- img = librosa.display.specshow(S_db, sr=self.sr, x_axis='time',
143
- y_axis='mel', ax=ax)
144
-
155
+ img = librosa.display.specshow(S_db,
156
+ sr=self.sr,
157
+ x_axis='time',
158
+ y_axis='mel',
159
+ ax=ax)
160
+
145
161
  ax.set_title(title)
146
162
  ax.set_xlabel('时间 (s)')
147
163
  ax.set_ylabel('梅尔频率')
148
-
164
+
149
165
  # 添加颜色条
150
166
  cbar = plt.colorbar(img, ax=ax, format='%+2.0f dB')
151
167
  cbar.set_label('功率 (dB)')
152
-
168
+
153
169
  plt.tight_layout()
154
170
  return fig
155
-
156
- def plot_spectrum(self, audio: np.ndarray, title: str = "频谱",
157
- log_scale: bool = True, ax: Optional[plt.Axes] = None) -> plt.Figure:
171
+
172
+ def plot_spectrum(self,
173
+ audio: np.ndarray,
174
+ title: str = "频谱",
175
+ log_scale: bool = True,
176
+ ax: Optional[plt.Axes] = None) -> plt.Figure:
158
177
  """
159
178
  绘制频谱
160
-
179
+
161
180
  Args:
162
181
  audio: 音频信号
163
182
  title: 图标题
164
183
  log_scale: 是否使用对数刻度
165
184
  ax: matplotlib轴对象
166
-
185
+
167
186
  Returns:
168
187
  图形对象
169
188
  """
@@ -171,17 +190,17 @@ class AudioVisualizer:
171
190
  fig, ax = plt.subplots(figsize=self.figsize)
172
191
  else:
173
192
  fig = ax.figure
174
-
193
+
175
194
  # 计算FFT
176
195
  fft_data = np.fft.fft(audio)
177
196
  magnitude = np.abs(fft_data)
178
- freqs = np.fft.fftfreq(len(audio), 1/self.sr)
179
-
197
+ freqs = np.fft.fftfreq(len(audio), 1 / self.sr)
198
+
180
199
  # 只取正频率部分
181
200
  positive_idx = freqs >= 0
182
201
  freqs = freqs[positive_idx]
183
202
  magnitude = magnitude[positive_idx]
184
-
203
+
185
204
  if log_scale:
186
205
  magnitude_db = 20 * np.log10(magnitude + 1e-10)
187
206
  ax.plot(freqs, magnitude_db)
@@ -189,280 +208,325 @@ class AudioVisualizer:
189
208
  else:
190
209
  ax.plot(freqs, magnitude)
191
210
  ax.set_ylabel('幅度')
192
-
211
+
193
212
  ax.set_xlabel('频率 (Hz)')
194
213
  ax.set_title(title)
195
214
  ax.grid(True, alpha=0.3)
196
-
215
+
197
216
  plt.tight_layout()
198
217
  return fig
199
-
200
- def plot_features_comparison(self, features_dict: Dict[str, np.ndarray],
201
- title: str = "特征对比") -> plt.Figure:
218
+
219
+ def plot_features_comparison(self,
220
+ features_dict: Dict[str, np.ndarray],
221
+ title: str = "特征对比") -> plt.Figure:
202
222
  """
203
223
  绘制多个特征的对比图
204
-
224
+
205
225
  Args:
206
226
  features_dict: 特征字典 {特征名: 特征值数组}
207
227
  title: 图标题
208
-
228
+
209
229
  Returns:
210
230
  图形对象
211
231
  """
212
232
  n_features = len(features_dict)
213
- fig, axes = plt.subplots(n_features, 1, figsize=(self.figsize[0], self.figsize[1] * n_features / 2))
214
-
233
+ fig, axes = plt.subplots(n_features,
234
+ 1,
235
+ figsize=(self.figsize[0],
236
+ self.figsize[1] * n_features / 2))
237
+
215
238
  if n_features == 1:
216
239
  axes = [axes]
217
-
218
- for i, (feature_name, feature_values) in enumerate(features_dict.items()):
240
+
241
+ for i, (feature_name,
242
+ feature_values) in enumerate(features_dict.items()):
219
243
  if len(feature_values.shape) == 1:
220
244
  # 一维特征
221
- time_axis = np.linspace(0, len(feature_values) / (self.sr / 512), len(feature_values))
245
+ time_axis = np.linspace(0,
246
+ len(feature_values) / (self.sr / 512),
247
+ len(feature_values))
222
248
  axes[i].plot(time_axis, feature_values)
223
249
  axes[i].set_ylabel(feature_name)
224
250
  else:
225
251
  # 二维特征(如MFCC)
226
- img = axes[i].imshow(feature_values, aspect='auto', origin='lower')
252
+ img = axes[i].imshow(feature_values,
253
+ aspect='auto',
254
+ origin='lower')
227
255
  axes[i].set_ylabel(feature_name)
228
256
  plt.colorbar(img, ax=axes[i])
229
-
257
+
230
258
  axes[i].set_title(f'{feature_name} 特征')
231
259
  axes[i].grid(True, alpha=0.3)
232
-
260
+
233
261
  axes[-1].set_xlabel('时间 (s)')
234
262
  plt.suptitle(title)
235
263
  plt.tight_layout()
236
264
  return fig
237
-
238
- def plot_statistics_distribution(self, stats_dict: Dict[str, List[float]],
239
- title: str = "统计分布图") -> plt.Figure:
265
+
266
+ def plot_statistics_distribution(self,
267
+ stats_dict: Dict[str, List[float]],
268
+ title: str = "统计分布图") -> plt.Figure:
240
269
  """
241
270
  绘制统计分布图
242
-
271
+
243
272
  Args:
244
273
  stats_dict: 统计数据字典
245
274
  title: 图标题
246
-
275
+
247
276
  Returns:
248
277
  图形对象
249
278
  """
250
279
  n_stats = len(stats_dict)
251
- fig, axes = plt.subplots(2, (n_stats + 1) // 2, figsize=(self.figsize[0], self.figsize[1]))
252
-
280
+ fig, axes = plt.subplots(2, (n_stats + 1) // 2,
281
+ figsize=(self.figsize[0], self.figsize[1]))
282
+
253
283
  if n_stats == 1:
254
284
  axes = [axes]
255
285
  elif n_stats == 2:
256
286
  axes = axes.flatten()
257
287
  else:
258
288
  axes = axes.flatten()
259
-
289
+
260
290
  for i, (stat_name, values) in enumerate(stats_dict.items()):
261
291
  if i >= len(axes):
262
292
  break
263
-
293
+
264
294
  # 绘制直方图和KDE
265
- axes[i].hist(values, bins=30, alpha=0.7, density=True, color='skyblue')
266
-
295
+ axes[i].hist(values,
296
+ bins=30,
297
+ alpha=0.7,
298
+ density=True,
299
+ color='skyblue')
300
+
267
301
  try:
268
302
  sns.kdeplot(values, ax=axes[i], color='red')
269
303
  except:
270
304
  pass
271
-
305
+
272
306
  axes[i].set_title(f'{stat_name} 分布')
273
307
  axes[i].set_xlabel(stat_name)
274
308
  axes[i].set_ylabel('密度')
275
309
  axes[i].grid(True, alpha=0.3)
276
-
310
+
277
311
  # 隐藏未使用的子图
278
312
  for j in range(i + 1, len(axes)):
279
313
  axes[j].set_visible(False)
280
-
314
+
281
315
  plt.suptitle(title)
282
316
  plt.tight_layout()
283
317
  return fig
284
-
285
- def plot_rms_distribution(self, rms_values: List[float],
286
- title: str = "RMS分布图") -> plt.Figure:
318
+
319
+ def plot_rms_distribution(self,
320
+ rms_values: List[float],
321
+ title: str = "RMS分布图") -> plt.Figure:
287
322
  """
288
323
  绘制RMS分布图
289
-
324
+
290
325
  Args:
291
326
  rms_values: RMS值列表
292
327
  title: 图标题
293
-
328
+
294
329
  Returns:
295
330
  图形对象
296
331
  """
297
332
  fig, (ax1, ax2) = plt.subplots(1, 2, figsize=self.figsize)
298
-
333
+
299
334
  # 线性尺度分布
300
- ax1.hist(rms_values, bins=50, alpha=0.7, color='lightblue', edgecolor='black')
335
+ ax1.hist(rms_values,
336
+ bins=50,
337
+ alpha=0.7,
338
+ color='lightblue',
339
+ edgecolor='black')
301
340
  ax1.set_xlabel('RMS 幅度')
302
341
  ax1.set_ylabel('频次')
303
342
  ax1.set_title('RMS 线性分布')
304
343
  ax1.grid(True, alpha=0.3)
305
-
344
+
306
345
  # 对数尺度分布
307
346
  rms_db = [20 * np.log10(rms + 1e-10) for rms in rms_values]
308
- ax2.hist(rms_db, bins=50, alpha=0.7, color='lightgreen', edgecolor='black')
347
+ ax2.hist(rms_db,
348
+ bins=50,
349
+ alpha=0.7,
350
+ color='lightgreen',
351
+ edgecolor='black')
309
352
  ax2.set_xlabel('RMS (dB)')
310
353
  ax2.set_ylabel('频次')
311
354
  ax2.set_title('RMS 对数分布')
312
355
  ax2.grid(True, alpha=0.3)
313
-
356
+
314
357
  plt.suptitle(title)
315
358
  plt.tight_layout()
316
359
  return fig
317
-
318
- def plot_audio_comparison(self, audio1: np.ndarray, audio2: np.ndarray,
319
- labels: List[str] = None, title: str = "音频对比") -> plt.Figure:
360
+
361
+ def plot_audio_comparison(self,
362
+ audio1: np.ndarray,
363
+ audio2: np.ndarray,
364
+ labels: List[str] = None,
365
+ title: str = "音频对比") -> plt.Figure:
320
366
  """
321
367
  绘制两个音频的对比图
322
-
368
+
323
369
  Args:
324
370
  audio1: 第一个音频
325
371
  audio2: 第二个音频
326
372
  labels: 标签列表
327
373
  title: 图标题
328
-
374
+
329
375
  Returns:
330
376
  图形对象
331
377
  """
332
378
  if labels is None:
333
379
  labels = ['音频1', '音频2']
334
-
335
- fig, axes = plt.subplots(3, 2, figsize=(self.figsize[0], self.figsize[1] * 1.5))
336
-
380
+
381
+ fig, axes = plt.subplots(3,
382
+ 2,
383
+ figsize=(self.figsize[0],
384
+ self.figsize[1] * 1.5))
385
+
337
386
  # 时域波形对比
338
387
  time1 = np.linspace(0, len(audio1) / self.sr, len(audio1))
339
388
  time2 = np.linspace(0, len(audio2) / self.sr, len(audio2))
340
-
389
+
341
390
  axes[0, 0].plot(time1, audio1, alpha=0.8)
342
391
  axes[0, 0].set_title(f'{labels[0]} - 波形')
343
392
  axes[0, 0].set_xlabel('时间 (s)')
344
393
  axes[0, 0].set_ylabel('幅度')
345
394
  axes[0, 0].grid(True, alpha=0.3)
346
-
395
+
347
396
  axes[0, 1].plot(time2, audio2, alpha=0.8, color='orange')
348
397
  axes[0, 1].set_title(f'{labels[1]} - 波形')
349
398
  axes[0, 1].set_xlabel('时间 (s)')
350
399
  axes[0, 1].set_ylabel('幅度')
351
400
  axes[0, 1].grid(True, alpha=0.3)
352
-
401
+
353
402
  # 频谱对比
354
403
  self.plot_spectrum(audio1, f'{labels[0]} - 频谱', ax=axes[1, 0])
355
404
  self.plot_spectrum(audio2, f'{labels[1]} - 频谱', ax=axes[1, 1])
356
-
405
+
357
406
  # 频谱图对比
358
407
  self.plot_spectrogram(audio1, f'{labels[0]} - 频谱图', ax=axes[2, 0])
359
408
  self.plot_spectrogram(audio2, f'{labels[1]} - 频谱图', ax=axes[2, 1])
360
-
409
+
361
410
  plt.suptitle(title)
362
411
  plt.tight_layout()
363
412
  return fig
364
413
 
365
414
 
366
- def plot_dataset_overview(file_paths: List[str], max_files: int = 10,
367
- sr: int = 22050) -> plt.Figure:
415
+ def plot_dataset_overview(file_paths: List[str],
416
+ max_files: int = 10,
417
+ sr: int = 22050) -> plt.Figure:
368
418
  """
369
419
  绘制数据集概览
370
-
420
+
371
421
  Args:
372
422
  file_paths: 音频文件路径列表
373
423
  max_files: 最大显示文件数
374
424
  sr: 采样率
375
-
425
+
376
426
  Returns:
377
427
  图形对象
378
428
  """
379
429
  visualizer = AudioVisualizer(sr=sr)
380
-
430
+
381
431
  # 限制文件数量
382
432
  selected_files = file_paths[:max_files]
383
-
384
- fig, axes = plt.subplots(len(selected_files), 2,
385
- figsize=(15, 3 * len(selected_files)))
386
-
433
+
434
+ fig, axes = plt.subplots(len(selected_files),
435
+ 2,
436
+ figsize=(15, 3 * len(selected_files)))
437
+
387
438
  if len(selected_files) == 1:
388
439
  axes = axes.reshape(1, -1)
389
-
440
+
390
441
  for i, file_path in enumerate(selected_files):
391
442
  try:
392
443
  audio, _ = librosa.load(file_path, sr=sr)
393
-
444
+
394
445
  # 波形图
395
- visualizer.plot_waveform(audio, f'文件 {i+1}: 波形', ax=axes[i, 0])
396
-
446
+ visualizer.plot_waveform(audio, f'文件 {i + 1}: 波形', ax=axes[i, 0])
447
+
397
448
  # 频谱图
398
- visualizer.plot_spectrogram(audio, f'文件 {i+1}: 频谱图', ax=axes[i, 1])
399
-
449
+ visualizer.plot_spectrogram(audio, f'文件 {i + 1}: 频谱图', ax=axes[i, 1])
450
+
400
451
  except Exception as e:
401
- axes[i, 0].text(0.5, 0.5, f'加载失败: {str(e)}',
402
- ha='center', va='center', transform=axes[i, 0].transAxes)
403
- axes[i, 1].text(0.5, 0.5, f'加载失败: {str(e)}',
404
- ha='center', va='center', transform=axes[i, 1].transAxes)
405
-
452
+ axes[i, 0].text(0.5,
453
+ 0.5,
454
+ f'加载失败: {str(e)}',
455
+ ha='center',
456
+ va='center',
457
+ transform=axes[i, 0].transAxes)
458
+ axes[i, 1].text(0.5,
459
+ 0.5,
460
+ f'加载失败: {str(e)}',
461
+ ha='center',
462
+ va='center',
463
+ transform=axes[i, 1].transAxes)
464
+
406
465
  plt.suptitle('数据集概览')
407
466
  plt.tight_layout()
408
467
  return fig
409
468
 
410
469
 
411
- def create_analysis_dashboard(audio: np.ndarray, sr: int = 22050) -> plt.Figure:
470
+ def create_analysis_dashboard(audio: np.ndarray,
471
+ sr: int = 22050) -> plt.Figure:
412
472
  """
413
473
  创建音频分析仪表板
414
-
474
+
415
475
  Args:
416
476
  audio: 音频信号
417
477
  sr: 采样率
418
-
478
+
419
479
  Returns:
420
480
  仪表板图形对象
421
481
  """
422
482
  visualizer = AudioVisualizer(sr=sr)
423
-
483
+
424
484
  fig = plt.figure(figsize=(16, 12))
425
-
485
+
426
486
  # 创建网格布局
427
487
  gs = fig.add_gridspec(3, 3, hspace=0.3, wspace=0.3)
428
-
488
+
429
489
  # 时域波形
430
490
  ax1 = fig.add_subplot(gs[0, :])
431
491
  visualizer.plot_waveform(audio, "时域波形", ax=ax1)
432
-
492
+
433
493
  # 频谱图
434
494
  ax2 = fig.add_subplot(gs[1, :2])
435
495
  visualizer.plot_spectrogram(audio, "频谱图", ax=ax2)
436
-
496
+
437
497
  # 频谱
438
498
  ax3 = fig.add_subplot(gs[1, 2])
439
499
  visualizer.plot_spectrum(audio, "频谱", ax=ax3)
440
-
500
+
441
501
  # 梅尔频谱图
442
502
  ax4 = fig.add_subplot(gs[2, :2])
443
503
  visualizer.plot_mel_spectrogram(audio, "梅尔频谱图", ax=ax4)
444
-
504
+
445
505
  # 特征统计
446
506
  ax5 = fig.add_subplot(gs[2, 2])
447
-
507
+
448
508
  # 计算基本统计
449
509
  duration = len(audio) / sr
450
510
  max_amp = np.max(np.abs(audio))
451
- rms_amp = np.sqrt(np.mean(audio ** 2))
452
-
511
+ rms_amp = np.sqrt(np.mean(audio**2))
512
+
453
513
  stats_text = f"""音频统计信息:
454
- 时长: {duration:.2f}s
455
- 最大幅度: {max_amp:.4f}
456
- RMS: {rms_amp:.4f}
457
- RMS (dB): {20*np.log10(rms_amp):.2f}
458
- 采样率: {sr} Hz
459
- 样本数: {len(audio)}
460
- """
461
-
462
- ax5.text(0.1, 0.5, stats_text, transform=ax5.transAxes,
463
- fontsize=10, verticalalignment='center')
514
+ 时长: {duration:.2f}s
515
+ 最大幅度: {max_amp:.4f}
516
+ RMS: {rms_amp:.4f}
517
+ RMS (dB): {20 * np.log10(rms_amp):.2f}
518
+ 采样率: {sr} Hz
519
+ 样本数: {len(audio)}
520
+ """
521
+
522
+ ax5.text(0.1,
523
+ 0.5,
524
+ stats_text,
525
+ transform=ax5.transAxes,
526
+ fontsize=10,
527
+ verticalalignment='center')
464
528
  ax5.set_title("统计信息")
465
529
  ax5.axis('off')
466
-
530
+
467
531
  plt.suptitle("音频分析仪表板", fontsize=16)
468
- return fig
532
+ return fig