neverlib 0.2.6__py3-none-any.whl → 0.2.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. neverlib/.claude/settings.local.json +9 -0
  2. neverlib/Docs/audio_aug/test_volume.ipynb +416 -0
  3. neverlib/Docs/audio_aug_test/test_volume.ipynb +289 -0
  4. neverlib/Docs/filter/biquad.ipynb +129 -0
  5. neverlib/Docs/filter/filter_family.ipynb +450 -0
  6. neverlib/Docs/filter/highpass.ipynb +139 -0
  7. neverlib/Docs/filter/scipy_filter_family.ipynb +110 -0
  8. neverlib/Docs/vad/VAD_Energy.ipynb +167 -0
  9. neverlib/Docs/vad/VAD_Silero.ipynb +325 -0
  10. neverlib/Docs/vad/VAD_WebRTC.ipynb +189 -0
  11. neverlib/Docs/vad/VAD_funasr.ipynb +192 -0
  12. neverlib/Docs/vad/VAD_rvADfast.ipynb +162 -0
  13. neverlib/Docs/vad/VAD_statistics.ipynb +532 -0
  14. neverlib/Docs/vad/VAD_tenVAD.ipynb +292 -0
  15. neverlib/Docs/vad/VAD_vadlib.ipynb +168 -0
  16. neverlib/Docs/vad/VAD_whisper.ipynb +404 -0
  17. neverlib/QA/gen_init.py +218 -0
  18. neverlib/QA/get_fun.py +19 -0
  19. neverlib/__init__.py +40 -4
  20. neverlib/audio_aug/HarmonicDistortion.py +19 -13
  21. neverlib/audio_aug/__init__.py +82 -12
  22. neverlib/audio_aug/audio_aug.py +19 -14
  23. neverlib/audio_aug/clip_aug.py +15 -18
  24. neverlib/audio_aug/coder_aug.py +44 -24
  25. neverlib/audio_aug/coder_aug2.py +54 -37
  26. neverlib/audio_aug/loss_packet_aug.py +7 -7
  27. neverlib/audio_aug/quant_aug.py +19 -17
  28. neverlib/data/000_short_enhance.wav +0 -0
  29. neverlib/data/3956_speech.wav +0 -0
  30. neverlib/data/3956_sweep.wav +0 -0
  31. neverlib/data/vad_example.wav +0 -0
  32. neverlib/data/white.wav +0 -0
  33. neverlib/data/white_EQ.wav +0 -0
  34. neverlib/data/white_matched.wav +0 -0
  35. neverlib/data_analyze/__init__.py +69 -20
  36. neverlib/data_analyze/dataset_analyzer.py +109 -114
  37. neverlib/data_analyze/quality_metrics.py +87 -89
  38. neverlib/data_analyze/rms_distrubution.py +23 -42
  39. neverlib/data_analyze/spectral_analysis.py +43 -46
  40. neverlib/data_analyze/statistics.py +76 -76
  41. neverlib/data_analyze/temporal_features.py +15 -6
  42. neverlib/data_analyze/visualization.py +208 -144
  43. neverlib/filter/__init__.py +40 -20
  44. neverlib/filter/auto_eq/__init__.py +50 -31
  45. neverlib/filter/auto_eq/de_eq.py +0 -2
  46. neverlib/filter/common.py +24 -5
  47. neverlib/metrics/DNSMOS/bak_ovr.onnx +0 -0
  48. neverlib/metrics/DNSMOS/model_v8.onnx +0 -0
  49. neverlib/metrics/DNSMOS/sig.onnx +0 -0
  50. neverlib/metrics/DNSMOS/sig_bak_ovr.onnx +0 -0
  51. neverlib/metrics/__init__.py +59 -0
  52. neverlib/metrics/dnsmos.py +4 -15
  53. neverlib/metrics/pDNSMOS/sig_bak_ovr.onnx +0 -0
  54. neverlib/metrics/pesq_c/PESQ +0 -0
  55. neverlib/metrics/pesq_c/dsp.c +553 -0
  56. neverlib/metrics/pesq_c/dsp.h +138 -0
  57. neverlib/metrics/pesq_c/pesq.h +294 -0
  58. neverlib/metrics/pesq_c/pesqdsp.c +1047 -0
  59. neverlib/metrics/pesq_c/pesqio.c +392 -0
  60. neverlib/metrics/pesq_c/pesqmain.c +610 -0
  61. neverlib/metrics/pesq_c/pesqmod.c +1417 -0
  62. neverlib/metrics/pesq_c/pesqpar.h +297 -0
  63. neverlib/metrics/snr.py +5 -1
  64. neverlib/metrics/spec.py +31 -21
  65. neverlib/metrics/test_pesq.py +0 -4
  66. neverlib/tests/__init__.py +33 -1
  67. neverlib/tests/test_imports.py +19 -0
  68. neverlib/utils/__init__.py +71 -15
  69. neverlib/utils/audio_split.py +6 -1
  70. neverlib/utils/checkGPU.py +17 -9
  71. neverlib/utils/lazy_expose.py +29 -0
  72. neverlib/utils/utils.py +55 -12
  73. neverlib/vad/PreProcess.py +66 -66
  74. neverlib/vad/__init__.py +71 -25
  75. neverlib/vad/class_get_speech.py +1 -1
  76. neverlib/vad/class_vad.py +3 -3
  77. neverlib/vad/img.png +0 -0
  78. {neverlib-0.2.6.dist-info → neverlib-0.2.8.dist-info}/METADATA +1 -1
  79. {neverlib-0.2.6.dist-info → neverlib-0.2.8.dist-info}/RECORD +82 -39
  80. {neverlib-0.2.6.dist-info → neverlib-0.2.8.dist-info}/WHEEL +0 -0
  81. {neverlib-0.2.6.dist-info → neverlib-0.2.8.dist-info}/licenses/LICENSE +0 -0
  82. {neverlib-0.2.6.dist-info → neverlib-0.2.8.dist-info}/top_level.txt +0 -0
@@ -4,40 +4,38 @@ Audio Quality Metrics Module
4
4
 
5
5
  提供音频质量评估和失真度分析功能
6
6
  """
7
-
8
- import numpy as np
9
7
  import librosa
8
+ import numpy as np
10
9
  from scipy import signal
11
10
  from scipy.fft import fft, fftfreq
12
11
  from typing import Tuple, Optional, Union, List
13
- import warnings
14
12
 
15
13
 
16
14
  class QualityAnalyzer:
17
15
  """音频质量分析器类"""
18
-
16
+
19
17
  def __init__(self, sr: int = 22050):
20
18
  """
21
19
  初始化质量分析器
22
-
20
+
23
21
  Args:
24
22
  sr: 采样率
25
23
  """
26
24
  self.sr = sr
27
-
28
- def signal_to_noise_ratio(self, signal_audio: np.ndarray,
29
- noise_audio: Optional[np.ndarray] = None,
30
- signal_start: Optional[int] = None,
31
- signal_end: Optional[int] = None) -> float:
25
+
26
+ def signal_to_noise_ratio(self, signal_audio: np.ndarray,
27
+ noise_audio: Optional[np.ndarray] = None,
28
+ signal_start: Optional[int] = None,
29
+ signal_end: Optional[int] = None) -> float:
32
30
  """
33
31
  计算信噪比 (SNR)
34
-
32
+
35
33
  Args:
36
34
  signal_audio: 含有信号和噪声的音频
37
35
  noise_audio: 纯噪声音频(可选)
38
36
  signal_start: 信号开始位置(当噪声未单独提供时使用)
39
37
  signal_end: 信号结束位置(当噪声未单独提供时使用)
40
-
38
+
41
39
  Returns:
42
40
  SNR值(dB)
43
41
  """
@@ -49,61 +47,61 @@ class QualityAnalyzer:
49
47
  # 从音频中提取信号和噪声部分
50
48
  if signal_start is None or signal_end is None:
51
49
  raise ValueError("Must provide signal_start and signal_end when noise_audio is None")
52
-
50
+
53
51
  signal_part = signal_audio[signal_start:signal_end]
54
-
52
+
55
53
  # 假设开头和结尾是噪声
56
54
  noise_start = signal_audio[:signal_start] if signal_start > 0 else np.array([])
57
55
  noise_end = signal_audio[signal_end:] if signal_end < len(signal_audio) else np.array([])
58
56
  noise_part = np.concatenate([noise_start, noise_end]) if len(noise_start) > 0 or len(noise_end) > 0 else signal_audio[:1000]
59
-
57
+
60
58
  signal_power = np.mean(signal_part ** 2)
61
59
  noise_power = np.mean(noise_part ** 2)
62
-
60
+
63
61
  if noise_power == 0:
64
62
  return float('inf')
65
-
63
+
66
64
  snr_db = 10 * np.log10(signal_power / noise_power)
67
65
  return snr_db
68
-
69
- def total_harmonic_distortion(self, audio: np.ndarray,
70
- fundamental_freq: Optional[float] = None,
71
- num_harmonics: int = 5) -> float:
66
+
67
+ def total_harmonic_distortion(self, audio: np.ndarray,
68
+ fundamental_freq: Optional[float] = None,
69
+ num_harmonics: int = 5) -> float:
72
70
  """
73
71
  计算总谐波失真 (THD)
74
-
72
+
75
73
  Args:
76
74
  audio: 音频信号
77
75
  fundamental_freq: 基频(Hz), 如果不提供则自动检测
78
76
  num_harmonics: 考虑的谐波数量
79
-
77
+
80
78
  Returns:
81
79
  THD百分比
82
80
  """
83
81
  # 计算频谱
84
82
  spectrum = fft(audio)
85
- freqs = fftfreq(len(audio), 1/self.sr)
83
+ freqs = fftfreq(len(audio), 1 / self.sr)
86
84
  magnitude = np.abs(spectrum)
87
-
85
+
88
86
  # 只考虑正频率
89
87
  positive_idx = freqs > 0
90
88
  freqs = freqs[positive_idx]
91
89
  magnitude = magnitude[positive_idx]
92
-
90
+
93
91
  # 如果没有提供基频, 自动检测
94
92
  if fundamental_freq is None:
95
93
  fundamental_freq = freqs[np.argmax(magnitude)]
96
-
94
+
97
95
  # 找到基频和谐波的功率
98
96
  tolerance = fundamental_freq * 0.05 # 5%的容差
99
-
97
+
100
98
  # 基频功率
101
99
  fundamental_idx = np.where(np.abs(freqs - fundamental_freq) < tolerance)[0]
102
100
  if len(fundamental_idx) == 0:
103
101
  return 0.0
104
-
102
+
105
103
  fundamental_power = np.max(magnitude[fundamental_idx]) ** 2
106
-
104
+
107
105
  # 谐波功率
108
106
  harmonic_power = 0
109
107
  for h in range(2, num_harmonics + 2):
@@ -111,97 +109,97 @@ class QualityAnalyzer:
111
109
  harmonic_idx = np.where(np.abs(freqs - harmonic_freq) < tolerance)[0]
112
110
  if len(harmonic_idx) > 0:
113
111
  harmonic_power += np.max(magnitude[harmonic_idx]) ** 2
114
-
112
+
115
113
  if fundamental_power == 0:
116
114
  return 0.0
117
-
115
+
118
116
  thd = np.sqrt(harmonic_power / fundamental_power) * 100
119
117
  return thd
120
-
121
- def dynamic_range(self, audio: np.ndarray, percentile_low: float = 1,
122
- percentile_high: float = 99) -> float:
118
+
119
+ def dynamic_range(self, audio: np.ndarray, percentile_low: float = 1,
120
+ percentile_high: float = 99) -> float:
123
121
  """
124
122
  计算动态范围
125
-
123
+
126
124
  Args:
127
125
  audio: 音频信号
128
126
  percentile_low: 低百分位数
129
127
  percentile_high: 高百分位数
130
-
128
+
131
129
  Returns:
132
130
  动态范围(dB)
133
131
  """
134
132
  amplitude = np.abs(audio)
135
133
  amplitude = amplitude[amplitude > 0] # 避免log(0)
136
-
134
+
137
135
  if len(amplitude) == 0:
138
136
  return 0.0
139
-
137
+
140
138
  low_level = np.percentile(amplitude, percentile_low)
141
139
  high_level = np.percentile(amplitude, percentile_high)
142
-
140
+
143
141
  dynamic_range_db = 20 * np.log10(high_level / (low_level + 1e-10))
144
142
  return dynamic_range_db
145
-
146
- def frequency_response(self, audio: np.ndarray,
147
- reference_audio: Optional[np.ndarray] = None) -> Tuple[np.ndarray, np.ndarray]:
143
+
144
+ def frequency_response(self, audio: np.ndarray,
145
+ reference_audio: Optional[np.ndarray] = None) -> Tuple[np.ndarray, np.ndarray]:
148
146
  """
149
147
  计算频率响应特性
150
-
148
+
151
149
  Args:
152
150
  audio: 测试音频信号
153
151
  reference_audio: 参考音频信号(可选)
154
-
152
+
155
153
  Returns:
156
154
  (频率数组, 幅度响应数组)
157
155
  """
158
156
  if reference_audio is not None:
159
157
  # 计算传递函数
160
158
  freqs, h = signal.freqz_zpk(*signal.tf2zpk([1], [1]), fs=self.sr)
161
-
159
+
162
160
  # 使用互相关计算频率响应
163
161
  cross_corr = signal.correlate(audio, reference_audio, mode='full')
164
162
  auto_corr = signal.correlate(reference_audio, reference_audio, mode='full')
165
-
163
+
166
164
  # 频域除法得到传递函数
167
165
  cross_spectrum = fft(cross_corr)
168
166
  auto_spectrum = fft(auto_corr)
169
-
167
+
170
168
  h_measured = cross_spectrum / (auto_spectrum + 1e-10)
171
- freqs = fftfreq(len(h_measured), 1/self.sr)
172
-
169
+ freqs = fftfreq(len(h_measured), 1 / self.sr)
170
+
173
171
  # 只取正频率部分
174
172
  positive_idx = freqs >= 0
175
173
  freqs = freqs[positive_idx]
176
174
  h_measured = h_measured[positive_idx]
177
-
175
+
178
176
  return freqs, np.abs(h_measured)
179
177
  else:
180
178
  # 直接返回频谱
181
179
  spectrum = fft(audio)
182
- freqs = fftfreq(len(audio), 1/self.sr)
183
-
180
+ freqs = fftfreq(len(audio), 1 / self.sr)
181
+
184
182
  positive_idx = freqs >= 0
185
183
  freqs = freqs[positive_idx]
186
184
  spectrum = spectrum[positive_idx]
187
-
185
+
188
186
  return freqs, np.abs(spectrum)
189
-
187
+
190
188
  def loudness_range(self, audio: np.ndarray, gate_threshold: float = -70) -> dict:
191
189
  """
192
190
  计算响度范围(基于EBU R128标准的简化版本)
193
-
191
+
194
192
  Args:
195
193
  audio: 音频信号
196
194
  gate_threshold: 门限阈值(dB)
197
-
195
+
198
196
  Returns:
199
197
  响度统计信息字典
200
198
  """
201
199
  # 分块计算短时响度
202
200
  block_size = int(0.4 * self.sr) # 400ms块
203
201
  hop_size = int(0.1 * self.sr) # 100ms跳跃
204
-
202
+
205
203
  blocks = []
206
204
  for i in range(0, len(audio) - block_size, hop_size):
207
205
  block = audio[i:i + block_size]
@@ -211,31 +209,31 @@ class QualityAnalyzer:
211
209
  loudness = 20 * np.log10(rms)
212
210
  if loudness > gate_threshold:
213
211
  blocks.append(loudness)
214
-
212
+
215
213
  if len(blocks) == 0:
216
214
  return {'integrated_loudness': -float('inf'), 'loudness_range': 0, 'max_loudness': -float('inf')}
217
-
215
+
218
216
  blocks = np.array(blocks)
219
-
217
+
220
218
  # 计算统计量
221
219
  integrated_loudness = np.mean(blocks)
222
220
  loudness_range = np.percentile(blocks, 95) - np.percentile(blocks, 10)
223
221
  max_loudness = np.max(blocks)
224
-
222
+
225
223
  return {
226
224
  'integrated_loudness': integrated_loudness,
227
225
  'loudness_range': loudness_range,
228
226
  'max_loudness': max_loudness
229
227
  }
230
-
228
+
231
229
  def spectral_distortion(self, original: np.ndarray, processed: np.ndarray) -> float:
232
230
  """
233
231
  计算谱失真度
234
-
232
+
235
233
  Args:
236
234
  original: 原始音频
237
235
  processed: 处理后音频
238
-
236
+
239
237
  Returns:
240
238
  谱失真度(dB)
241
239
  """
@@ -243,48 +241,48 @@ class QualityAnalyzer:
243
241
  min_len = min(len(original), len(processed))
244
242
  original = original[:min_len]
245
243
  processed = processed[:min_len]
246
-
244
+
247
245
  # 计算频谱
248
246
  orig_spectrum = np.abs(fft(original))
249
247
  proc_spectrum = np.abs(fft(processed))
250
-
248
+
251
249
  # 计算谱失真
252
250
  mse = np.mean((orig_spectrum - proc_spectrum) ** 2)
253
251
  orig_power = np.mean(orig_spectrum ** 2)
254
-
252
+
255
253
  if orig_power == 0:
256
254
  return float('inf')
257
-
255
+
258
256
  distortion_db = 10 * np.log10(mse / orig_power)
259
257
  return distortion_db
260
258
 
261
259
 
262
- def comprehensive_quality_assessment(audio: np.ndarray, sr: int = 22050,
263
- reference: Optional[np.ndarray] = None) -> dict:
260
+ def comprehensive_quality_assessment(audio: np.ndarray, sr: int = 22050,
261
+ reference: Optional[np.ndarray] = None) -> dict:
264
262
  """
265
263
  综合质量评估
266
-
264
+
267
265
  Args:
268
266
  audio: 待评估音频
269
267
  sr: 采样率
270
268
  reference: 参考音频(可选)
271
-
269
+
272
270
  Returns:
273
271
  质量评估结果字典
274
272
  """
275
273
  analyzer = QualityAnalyzer(sr=sr)
276
-
274
+
277
275
  results = {
278
276
  'dynamic_range': analyzer.dynamic_range(audio),
279
277
  'loudness_stats': analyzer.loudness_range(audio),
280
278
  }
281
-
279
+
282
280
  # 尝试计算THD
283
281
  try:
284
282
  results['thd'] = analyzer.total_harmonic_distortion(audio)
285
283
  except:
286
284
  results['thd'] = None
287
-
285
+
288
286
  # 如果有参考音频, 计算比较指标
289
287
  if reference is not None:
290
288
  try:
@@ -293,7 +291,7 @@ def comprehensive_quality_assessment(audio: np.ndarray, sr: int = 22050,
293
291
  except:
294
292
  results['snr'] = None
295
293
  results['spectral_distortion'] = None
296
-
294
+
297
295
  # 频率响应
298
296
  try:
299
297
  freqs, response = analyzer.frequency_response(audio, reference)
@@ -303,18 +301,18 @@ def comprehensive_quality_assessment(audio: np.ndarray, sr: int = 22050,
303
301
  }
304
302
  except:
305
303
  results['frequency_response'] = None
306
-
304
+
307
305
  return results
308
306
 
309
307
 
310
308
  def audio_health_check(audio: np.ndarray, sr: int = 22050) -> dict:
311
309
  """
312
310
  音频健康检查
313
-
311
+
314
312
  Args:
315
313
  audio: 音频信号
316
314
  sr: 采样率
317
-
315
+
318
316
  Returns:
319
317
  健康检查结果
320
318
  """
@@ -323,36 +321,36 @@ def audio_health_check(audio: np.ndarray, sr: int = 22050) -> dict:
323
321
  'warnings': [],
324
322
  'stats': {}
325
323
  }
326
-
324
+
327
325
  # 基础统计
328
326
  max_amplitude = np.max(np.abs(audio))
329
327
  min_amplitude = np.min(np.abs(audio))
330
328
  mean_amplitude = np.mean(np.abs(audio))
331
-
329
+
332
330
  health_report['stats'] = {
333
331
  'max_amplitude': max_amplitude,
334
332
  'min_amplitude': min_amplitude,
335
333
  'mean_amplitude': mean_amplitude,
336
334
  'duration': len(audio) / sr
337
335
  }
338
-
336
+
339
337
  # 检查削波
340
338
  if max_amplitude >= 0.99:
341
339
  health_report['issues'].append('Potential clipping detected')
342
-
340
+
343
341
  # 检查过低音量
344
342
  if max_amplitude < 0.01:
345
343
  health_report['warnings'].append('Very low signal level')
346
-
344
+
347
345
  # 检查静音
348
346
  if mean_amplitude < 1e-6:
349
347
  health_report['issues'].append('Signal appears to be silent')
350
-
348
+
351
349
  # 检查DC偏移
352
350
  dc_offset = np.mean(audio)
353
351
  if abs(dc_offset) > 0.01:
354
352
  health_report['warnings'].append(f'DC offset detected: {dc_offset:.4f}')
355
-
353
+
356
354
  # 检查动态范围
357
355
  analyzer = QualityAnalyzer(sr=sr)
358
356
  dynamic_range = analyzer.dynamic_range(audio)
@@ -360,5 +358,5 @@ def audio_health_check(audio: np.ndarray, sr: int = 22050) -> dict:
360
358
  health_report['warnings'].append('Low dynamic range')
361
359
  elif dynamic_range > 60:
362
360
  health_report['warnings'].append('Very high dynamic range - check for noise')
363
-
364
- return health_report
361
+
362
+ return health_report
@@ -1,32 +1,13 @@
1
1
  '''
2
2
  Author: 凌逆战 | Never
3
3
  Date: 2025-03-26 22:13:22
4
- Description:
4
+ Description: 统计音频语音段rms值分布
5
5
  '''
6
- # -*- coding:utf-8 -*-
7
- # Author:凌逆战 | Never
8
- # Date: 2025/3/2
9
- """
10
- 统计音频语音段rms值分布
11
- """
12
- import sys
13
- sys.path.append("../../../")
14
- import torch
15
6
  import soundfile as sf
7
+ from .temporal_features import rms_amplitude
16
8
  from neverlib.utils import get_path_list
17
- from neverlib.filter import HPFilter
18
- from neverlib.audio_aug import volume_norm
19
- from neverlib.dataAnalyze.utils import rms_amplitude
20
- from joblib import Parallel, delayed
21
- import matplotlib.pyplot as plt
22
- import numpy as np
23
- import librosa
24
- import os
25
- from utils.train_utils import from_path_get_vadpoint
26
9
 
27
10
 
28
-
29
-
30
11
  def get_rms_vad(wav_path):
31
12
  wav, wav_sr = sf.read(wav_path, always_2d=True) # (xxx,ch)
32
13
  assert wav_sr == sr, f"期望采样率为{sr}, 但是为{wav_sr}, 文件名: {wav_path}"
@@ -39,24 +20,24 @@ def get_rms_vad(wav_path):
39
20
  return rms
40
21
 
41
22
 
42
- sr = 16000
43
- wav_dir_list = [
44
- "/data/never/Dataset/kws_data/Command_Word/Crowdsourcing/en_kws2/train/RealPerson",
45
- "/data/never/Dataset/kws_data/Command_Word/Crowdsourcing/en_kws2/val/RealPerson",
46
- "/data/never/Dataset/kws_data/Command_Word/Crowdsourcing/en_kws2/test/RealPerson",
47
- ]
48
- wav_path_list = []
49
- for wav_dir in wav_dir_list:
50
- wav_path_list.extend(get_path_list(wav_dir, end="*.wav"))
51
-
52
- rms_list = Parallel(n_jobs=64)(delayed(get_rms_vad)(wav_path) for wav_path in wav_path_list)
53
-
54
- # 绘制时长分布直方图
55
- plt.hist(rms_list, bins=100, edgecolor='black')
56
- plt.title("RMS Distribution")
57
- plt.xlabel("RMS (dB)")
58
- plt.ylabel("number")
59
- plt.grid(True)
60
- plt.tight_layout()
61
- plt.savefig("./png_dist/rms_distribution.png")
62
-
23
+ if __name__ == "__main__":
24
+ sr = 16000
25
+ wav_dir_list = [
26
+ "/data/never/Dataset/kws_data/Command_Word/Crowdsourcing/en_kws2/train/RealPerson",
27
+ "/data/never/Dataset/kws_data/Command_Word/Crowdsourcing/en_kws2/val/RealPerson",
28
+ "/data/never/Dataset/kws_data/Command_Word/Crowdsourcing/en_kws2/test/RealPerson",
29
+ ]
30
+ wav_path_list = []
31
+ for wav_dir in wav_dir_list:
32
+ wav_path_list.extend(get_path_list(wav_dir, end="*.wav"))
33
+
34
+ rms_list = Parallel(n_jobs=64)(delayed(get_rms_vad)(wav_path) for wav_path in wav_path_list)
35
+
36
+ # 绘制时长分布直方图
37
+ plt.hist(rms_list, bins=100, edgecolor='black')
38
+ plt.title("RMS Distribution")
39
+ plt.xlabel("RMS (dB)")
40
+ plt.ylabel("number")
41
+ plt.grid(True)
42
+ plt.tight_layout()
43
+ plt.savefig("./png_dist/rms_distribution.png")