neverlib 0.2.6__py3-none-any.whl → 0.2.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. neverlib/.claude/settings.local.json +9 -0
  2. neverlib/Docs/audio_aug/test_volume.ipynb +416 -0
  3. neverlib/Docs/audio_aug_test/test_volume.ipynb +289 -0
  4. neverlib/Docs/filter/biquad.ipynb +129 -0
  5. neverlib/Docs/filter/filter_family.ipynb +450 -0
  6. neverlib/Docs/filter/highpass.ipynb +139 -0
  7. neverlib/Docs/filter/scipy_filter_family.ipynb +110 -0
  8. neverlib/Docs/vad/VAD_Energy.ipynb +167 -0
  9. neverlib/Docs/vad/VAD_Silero.ipynb +325 -0
  10. neverlib/Docs/vad/VAD_WebRTC.ipynb +189 -0
  11. neverlib/Docs/vad/VAD_funasr.ipynb +192 -0
  12. neverlib/Docs/vad/VAD_rvADfast.ipynb +162 -0
  13. neverlib/Docs/vad/VAD_statistics.ipynb +532 -0
  14. neverlib/Docs/vad/VAD_tenVAD.ipynb +292 -0
  15. neverlib/Docs/vad/VAD_vadlib.ipynb +168 -0
  16. neverlib/Docs/vad/VAD_whisper.ipynb +404 -0
  17. neverlib/QA/gen_init.py +218 -0
  18. neverlib/QA/get_fun.py +19 -0
  19. neverlib/__init__.py +40 -4
  20. neverlib/audio_aug/HarmonicDistortion.py +19 -13
  21. neverlib/audio_aug/__init__.py +82 -12
  22. neverlib/audio_aug/audio_aug.py +19 -14
  23. neverlib/audio_aug/clip_aug.py +15 -18
  24. neverlib/audio_aug/coder_aug.py +44 -24
  25. neverlib/audio_aug/coder_aug2.py +54 -37
  26. neverlib/audio_aug/loss_packet_aug.py +7 -7
  27. neverlib/audio_aug/quant_aug.py +19 -17
  28. neverlib/data/000_short_enhance.wav +0 -0
  29. neverlib/data/3956_speech.wav +0 -0
  30. neverlib/data/3956_sweep.wav +0 -0
  31. neverlib/data/vad_example.wav +0 -0
  32. neverlib/data/white.wav +0 -0
  33. neverlib/data/white_EQ.wav +0 -0
  34. neverlib/data/white_matched.wav +0 -0
  35. neverlib/data_analyze/__init__.py +69 -20
  36. neverlib/data_analyze/dataset_analyzer.py +109 -114
  37. neverlib/data_analyze/quality_metrics.py +87 -89
  38. neverlib/data_analyze/rms_distrubution.py +23 -42
  39. neverlib/data_analyze/spectral_analysis.py +43 -46
  40. neverlib/data_analyze/statistics.py +76 -76
  41. neverlib/data_analyze/temporal_features.py +15 -6
  42. neverlib/data_analyze/visualization.py +208 -144
  43. neverlib/filter/__init__.py +40 -20
  44. neverlib/filter/auto_eq/__init__.py +50 -31
  45. neverlib/filter/auto_eq/de_eq.py +0 -2
  46. neverlib/filter/common.py +24 -5
  47. neverlib/metrics/DNSMOS/bak_ovr.onnx +0 -0
  48. neverlib/metrics/DNSMOS/model_v8.onnx +0 -0
  49. neverlib/metrics/DNSMOS/sig.onnx +0 -0
  50. neverlib/metrics/DNSMOS/sig_bak_ovr.onnx +0 -0
  51. neverlib/metrics/__init__.py +59 -0
  52. neverlib/metrics/dnsmos.py +4 -15
  53. neverlib/metrics/pDNSMOS/sig_bak_ovr.onnx +0 -0
  54. neverlib/metrics/pesq_c/PESQ +0 -0
  55. neverlib/metrics/pesq_c/dsp.c +553 -0
  56. neverlib/metrics/pesq_c/dsp.h +138 -0
  57. neverlib/metrics/pesq_c/pesq.h +294 -0
  58. neverlib/metrics/pesq_c/pesqdsp.c +1047 -0
  59. neverlib/metrics/pesq_c/pesqio.c +392 -0
  60. neverlib/metrics/pesq_c/pesqmain.c +610 -0
  61. neverlib/metrics/pesq_c/pesqmod.c +1417 -0
  62. neverlib/metrics/pesq_c/pesqpar.h +297 -0
  63. neverlib/metrics/snr.py +5 -1
  64. neverlib/metrics/spec.py +31 -21
  65. neverlib/metrics/test_pesq.py +0 -4
  66. neverlib/tests/__init__.py +33 -1
  67. neverlib/tests/test_imports.py +19 -0
  68. neverlib/utils/__init__.py +71 -15
  69. neverlib/utils/audio_split.py +6 -1
  70. neverlib/utils/checkGPU.py +17 -9
  71. neverlib/utils/lazy_expose.py +29 -0
  72. neverlib/utils/utils.py +55 -12
  73. neverlib/vad/PreProcess.py +66 -66
  74. neverlib/vad/__init__.py +71 -25
  75. neverlib/vad/class_get_speech.py +1 -1
  76. neverlib/vad/class_vad.py +3 -3
  77. neverlib/vad/img.png +0 -0
  78. {neverlib-0.2.6.dist-info → neverlib-0.2.8.dist-info}/METADATA +1 -1
  79. {neverlib-0.2.6.dist-info → neverlib-0.2.8.dist-info}/RECORD +82 -39
  80. {neverlib-0.2.6.dist-info → neverlib-0.2.8.dist-info}/WHEEL +0 -0
  81. {neverlib-0.2.6.dist-info → neverlib-0.2.8.dist-info}/licenses/LICENSE +0 -0
  82. {neverlib-0.2.6.dist-info → neverlib-0.2.8.dist-info}/top_level.txt +0 -0
@@ -4,22 +4,19 @@ Spectral Analysis Module
4
4
 
5
5
  提供音频频域特征提取和分析功能
6
6
  """
7
-
8
- import numpy as np
9
7
  import librosa
10
- import scipy.signal
8
+ import numpy as np
11
9
  from scipy.fft import fft, fftfreq
12
10
  from typing import Tuple, Optional, Union
13
- import warnings
14
11
 
15
12
 
16
13
  class SpectralAnalyzer:
17
14
  """频谱分析器类"""
18
-
15
+
19
16
  def __init__(self, sr: int = 22050, n_fft: int = 2048, hop_length: int = 512):
20
17
  """
21
18
  初始化频谱分析器
22
-
19
+
23
20
  Args:
24
21
  sr: 采样率
25
22
  n_fft: FFT窗口大小
@@ -28,132 +25,132 @@ class SpectralAnalyzer:
28
25
  self.sr = sr
29
26
  self.n_fft = n_fft
30
27
  self.hop_length = hop_length
31
-
28
+
32
29
  def compute_stft(self, audio: np.ndarray) -> np.ndarray:
33
30
  """
34
31
  计算短时傅里叶变换
35
-
32
+
36
33
  Args:
37
34
  audio: 音频信号
38
-
35
+
39
36
  Returns:
40
37
  STFT结果
41
38
  """
42
39
  return librosa.stft(audio, n_fft=self.n_fft, hop_length=self.hop_length)
43
-
40
+
44
41
  def compute_magnitude_spectrum(self, audio: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
45
42
  """
46
43
  计算幅度谱
47
-
44
+
48
45
  Args:
49
46
  audio: 音频信号
50
-
47
+
51
48
  Returns:
52
49
  频率轴, 幅度谱
53
50
  """
54
51
  spectrum = fft(audio)
55
52
  magnitude = np.abs(spectrum)
56
- freqs = fftfreq(len(audio), 1/self.sr)
57
-
53
+ freqs = fftfreq(len(audio), 1 / self.sr)
54
+
58
55
  # 只返回正频率部分
59
56
  positive_freq_idx = freqs >= 0
60
57
  return freqs[positive_freq_idx], magnitude[positive_freq_idx]
61
-
58
+
62
59
  def spectral_centroid(self, audio: np.ndarray) -> np.ndarray:
63
60
  """
64
61
  计算谱重心
65
-
62
+
66
63
  Args:
67
64
  audio: 音频信号
68
-
65
+
69
66
  Returns:
70
67
  谱重心数组
71
68
  """
72
69
  return librosa.feature.spectral_centroid(
73
70
  y=audio, sr=self.sr, hop_length=self.hop_length
74
71
  )[0]
75
-
72
+
76
73
  def spectral_rolloff(self, audio: np.ndarray, roll_percent: float = 0.85) -> np.ndarray:
77
74
  """
78
75
  计算谱滚降
79
-
76
+
80
77
  Args:
81
78
  audio: 音频信号
82
79
  roll_percent: 滚降百分比
83
-
80
+
84
81
  Returns:
85
82
  谱滚降数组
86
83
  """
87
84
  return librosa.feature.spectral_rolloff(
88
85
  y=audio, sr=self.sr, hop_length=self.hop_length, roll_percent=roll_percent
89
86
  )[0]
90
-
87
+
91
88
  def spectral_flatness(self, audio: np.ndarray) -> np.ndarray:
92
89
  """
93
90
  计算谱平坦度
94
-
91
+
95
92
  Args:
96
93
  audio: 音频信号
97
-
94
+
98
95
  Returns:
99
96
  谱平坦度数组
100
97
  """
101
98
  return librosa.feature.spectral_flatness(
102
99
  y=audio, hop_length=self.hop_length
103
100
  )[0]
104
-
101
+
105
102
  def spectral_contrast(self, audio: np.ndarray, n_bands: int = 6) -> np.ndarray:
106
103
  """
107
104
  计算谱对比度
108
-
105
+
109
106
  Args:
110
107
  audio: 音频信号
111
108
  n_bands: 频段数量
112
-
109
+
113
110
  Returns:
114
111
  谱对比度矩阵
115
112
  """
116
113
  return librosa.feature.spectral_contrast(
117
114
  y=audio, sr=self.sr, hop_length=self.hop_length, n_bands=n_bands
118
115
  )
119
-
116
+
120
117
  def mfcc_features(self, audio: np.ndarray, n_mfcc: int = 13) -> np.ndarray:
121
118
  """
122
119
  提取MFCC特征
123
-
120
+
124
121
  Args:
125
122
  audio: 音频信号
126
123
  n_mfcc: MFCC系数数量
127
-
124
+
128
125
  Returns:
129
126
  MFCC特征矩阵
130
127
  """
131
128
  return librosa.feature.mfcc(
132
129
  y=audio, sr=self.sr, n_mfcc=n_mfcc, hop_length=self.hop_length
133
130
  )
134
-
131
+
135
132
  def mel_spectrogram(self, audio: np.ndarray, n_mels: int = 128) -> np.ndarray:
136
133
  """
137
134
  计算梅尔频谱图
138
-
135
+
139
136
  Args:
140
137
  audio: 音频信号
141
138
  n_mels: 梅尔滤波器组数量
142
-
139
+
143
140
  Returns:
144
141
  梅尔频谱图
145
142
  """
146
143
  return librosa.feature.melspectrogram(
147
144
  y=audio, sr=self.sr, n_mels=n_mels, hop_length=self.hop_length
148
145
  )
149
-
146
+
150
147
  def chroma_features(self, audio: np.ndarray) -> np.ndarray:
151
148
  """
152
149
  提取色度特征
153
-
150
+
154
151
  Args:
155
152
  audio: 音频信号
156
-
153
+
157
154
  Returns:
158
155
  色度特征矩阵
159
156
  """
@@ -165,16 +162,16 @@ class SpectralAnalyzer:
165
162
  def compute_spectral_features(audio: np.ndarray, sr: int = 22050) -> dict:
166
163
  """
167
164
  计算完整的频域特征集合
168
-
165
+
169
166
  Args:
170
167
  audio: 音频信号
171
168
  sr: 采样率
172
-
169
+
173
170
  Returns:
174
171
  包含各种频域特征的字典
175
172
  """
176
173
  analyzer = SpectralAnalyzer(sr=sr)
177
-
174
+
178
175
  features = {
179
176
  'spectral_centroid': analyzer.spectral_centroid(audio),
180
177
  'spectral_rolloff': analyzer.spectral_rolloff(audio),
@@ -184,27 +181,27 @@ def compute_spectral_features(audio: np.ndarray, sr: int = 22050) -> dict:
184
181
  'mel_spectrogram': analyzer.mel_spectrogram(audio),
185
182
  'chroma': analyzer.chroma_features(audio)
186
183
  }
187
-
184
+
188
185
  return features
189
186
 
190
187
 
191
188
  def frequency_domain_stats(audio: np.ndarray, sr: int = 22050) -> dict:
192
189
  """
193
190
  计算频域统计信息
194
-
191
+
195
192
  Args:
196
193
  audio: 音频信号
197
194
  sr: 采样率
198
-
195
+
199
196
  Returns:
200
197
  频域统计信息字典
201
198
  """
202
199
  analyzer = SpectralAnalyzer(sr=sr)
203
200
  freqs, magnitude = analyzer.compute_magnitude_spectrum(audio)
204
-
201
+
205
202
  # 计算功率谱密度
206
203
  power = magnitude ** 2
207
-
204
+
208
205
  # 计算统计量
209
206
  stats = {
210
207
  'mean_frequency': np.average(freqs, weights=power),
@@ -212,7 +209,7 @@ def frequency_domain_stats(audio: np.ndarray, sr: int = 22050) -> dict:
212
209
  'peak_frequency': freqs[np.argmax(magnitude)],
213
210
  'bandwidth': freqs[np.where(power > 0.5 * np.max(power))][-1] - freqs[np.where(power > 0.5 * np.max(power))][0],
214
211
  'spectral_energy': np.sum(power),
215
- 'spectral_entropy': -np.sum((power/np.sum(power)) * np.log2(power/np.sum(power) + 1e-10))
212
+ 'spectral_entropy': -np.sum((power / np.sum(power)) * np.log2(power / np.sum(power) + 1e-10))
216
213
  }
217
-
218
- return stats
214
+
215
+ return stats
@@ -4,25 +4,20 @@ Statistics Analysis Module
4
4
 
5
5
  提供音频数据集统计分析功能
6
6
  """
7
-
7
+ import json
8
8
  import numpy as np
9
- import librosa
10
- import os
11
9
  from pathlib import Path
12
- from typing import List, Dict, Tuple, Optional, Union
13
- import matplotlib.pyplot as plt
14
- from collections import defaultdict
15
- import json
16
- from .utils import rms_amplitude, dB
10
+ from typing import List, Dict, Tuple, Optional
11
+ from .temporal_features import rms_amplitude, dB
17
12
 
18
13
 
19
14
  class AudioStatistics:
20
15
  """音频统计分析类"""
21
-
16
+
22
17
  def __init__(self, sr: int = 22050):
23
18
  """
24
19
  初始化统计分析器
25
-
20
+
26
21
  Args:
27
22
  sr: 采样率
28
23
  """
@@ -30,53 +25,58 @@ class AudioStatistics:
30
25
  self.audio_data = []
31
26
  self.file_paths = []
32
27
  self.statistics = {}
33
-
28
+
34
29
  def add_audio_file(self, file_path: str, audio_data: Optional[np.ndarray] = None):
35
30
  """
36
31
  添加音频文件到分析列表
37
-
32
+
38
33
  Args:
39
34
  file_path: 音频文件路径
40
35
  audio_data: 音频数据(如果不提供则从文件加载)
41
36
  """
37
+ try:
38
+ import librosa
39
+ except Exception as e:
40
+ raise ImportError("需要安装 librosa 才能使用 add_audio_file: pip install librosa") from e
41
+
42
42
  if audio_data is None:
43
43
  try:
44
44
  audio_data, _ = librosa.load(file_path, sr=self.sr)
45
45
  except Exception as e:
46
46
  print(f"Error loading {file_path}: {e}")
47
47
  return
48
-
48
+
49
49
  self.audio_data.append(audio_data)
50
50
  self.file_paths.append(file_path)
51
-
51
+
52
52
  def add_audio_directory(self, directory: str, extensions: List[str] = None):
53
53
  """
54
54
  批量添加目录中的音频文件
55
-
55
+
56
56
  Args:
57
57
  directory: 音频文件目录
58
58
  extensions: 支持的文件扩展名
59
59
  """
60
60
  if extensions is None:
61
61
  extensions = ['.wav', '.mp3', '.flac', '.m4a', '.aac']
62
-
62
+
63
63
  directory = Path(directory)
64
64
  for ext in extensions:
65
65
  for file_path in directory.glob(f'*{ext}'):
66
66
  self.add_audio_file(str(file_path))
67
-
67
+
68
68
  def compute_duration_statistics(self) -> Dict:
69
69
  """
70
70
  计算音频时长统计
71
-
71
+
72
72
  Returns:
73
73
  时长统计信息
74
74
  """
75
75
  durations = [len(audio) / self.sr for audio in self.audio_data]
76
-
76
+
77
77
  if not durations:
78
78
  return {}
79
-
79
+
80
80
  stats = {
81
81
  'count': len(durations),
82
82
  'total_duration': sum(durations),
@@ -92,30 +92,30 @@ class AudioStatistics:
92
92
  '95th': np.percentile(durations, 95)
93
93
  }
94
94
  }
95
-
95
+
96
96
  return stats
97
-
97
+
98
98
  def compute_amplitude_statistics(self) -> Dict:
99
99
  """
100
100
  计算幅度统计
101
-
101
+
102
102
  Returns:
103
103
  幅度统计信息
104
104
  """
105
105
  all_amplitudes = []
106
106
  max_amplitudes = []
107
107
  rms_values = []
108
-
108
+
109
109
  for audio in self.audio_data:
110
110
  all_amplitudes.extend(np.abs(audio).tolist())
111
111
  max_amplitudes.append(np.max(np.abs(audio)))
112
112
  rms_values.append(rms_amplitude(audio))
113
-
113
+
114
114
  if not all_amplitudes:
115
115
  return {}
116
-
116
+
117
117
  all_amplitudes = np.array(all_amplitudes)
118
-
118
+
119
119
  stats = {
120
120
  'overall': {
121
121
  'mean': np.mean(all_amplitudes),
@@ -144,33 +144,33 @@ class AudioStatistics:
144
144
  'std_db': np.std([dB(rms) for rms in rms_values])
145
145
  }
146
146
  }
147
-
147
+
148
148
  return stats
149
-
149
+
150
150
  def compute_frequency_statistics(self) -> Dict:
151
151
  """
152
152
  计算频域统计
153
-
153
+
154
154
  Returns:
155
155
  频域统计信息
156
156
  """
157
157
  spectral_centroids = []
158
158
  spectral_bandwidths = []
159
159
  spectral_rolloffs = []
160
-
160
+
161
161
  for audio in self.audio_data:
162
162
  # 计算频谱特征
163
163
  centroid = librosa.feature.spectral_centroid(y=audio, sr=self.sr)[0]
164
164
  bandwidth = librosa.feature.spectral_bandwidth(y=audio, sr=self.sr)[0]
165
165
  rolloff = librosa.feature.spectral_rolloff(y=audio, sr=self.sr)[0]
166
-
166
+
167
167
  spectral_centroids.extend(centroid.tolist())
168
168
  spectral_bandwidths.extend(bandwidth.tolist())
169
169
  spectral_rolloffs.extend(rolloff.tolist())
170
-
170
+
171
171
  if not spectral_centroids:
172
172
  return {}
173
-
173
+
174
174
  stats = {
175
175
  'spectral_centroid': {
176
176
  'mean': np.mean(spectral_centroids),
@@ -191,17 +191,17 @@ class AudioStatistics:
191
191
  'max': np.max(spectral_rolloffs)
192
192
  }
193
193
  }
194
-
194
+
195
195
  return stats
196
-
196
+
197
197
  def detect_outliers(self, feature: str = 'duration', threshold: float = 2.0) -> List[Tuple[str, float]]:
198
198
  """
199
199
  检测异常值
200
-
200
+
201
201
  Args:
202
202
  feature: 要检测的特征 ('duration', 'max_amplitude', 'rms')
203
203
  threshold: Z-score阈值
204
-
204
+
205
205
  Returns:
206
206
  异常文件列表 [(文件路径, 特征值)]
207
207
  """
@@ -213,23 +213,23 @@ class AudioStatistics:
213
213
  values = [rms_amplitude(audio) for audio in self.audio_data]
214
214
  else:
215
215
  raise ValueError(f"Unknown feature: {feature}")
216
-
216
+
217
217
  values = np.array(values)
218
218
  mean_val = np.mean(values)
219
219
  std_val = np.std(values)
220
-
220
+
221
221
  outliers = []
222
222
  for i, (path, val) in enumerate(zip(self.file_paths, values)):
223
223
  z_score = abs(val - mean_val) / (std_val + 1e-10)
224
224
  if z_score > threshold:
225
225
  outliers.append((path, val))
226
-
226
+
227
227
  return outliers
228
-
228
+
229
229
  def generate_distribution_analysis(self) -> Dict:
230
230
  """
231
231
  生成分布分析
232
-
232
+
233
233
  Returns:
234
234
  分布分析结果
235
235
  """
@@ -238,34 +238,34 @@ class AudioStatistics:
238
238
  'amplitude_distribution': self._analyze_distribution([np.max(np.abs(audio)) for audio in self.audio_data]),
239
239
  'rms_distribution': self._analyze_distribution([rms_amplitude(audio) for audio in self.audio_data])
240
240
  }
241
-
241
+
242
242
  return analysis
243
-
243
+
244
244
  def _analyze_distribution(self, values: List[float]) -> Dict:
245
245
  """
246
246
  分析数值分布
247
-
247
+
248
248
  Args:
249
249
  values: 数值列表
250
-
250
+
251
251
  Returns:
252
252
  分布分析结果
253
253
  """
254
254
  if not values:
255
255
  return {}
256
-
256
+
257
257
  values = np.array(values)
258
-
258
+
259
259
  # 计算偏度和峰度
260
260
  mean_val = np.mean(values)
261
261
  std_val = np.std(values)
262
-
262
+
263
263
  # 偏度 (skewness)
264
264
  skewness = np.mean(((values - mean_val) / (std_val + 1e-10)) ** 3)
265
-
265
+
266
266
  # 峰度 (kurtosis)
267
267
  kurtosis = np.mean(((values - mean_val) / (std_val + 1e-10)) ** 4) - 3
268
-
268
+
269
269
  return {
270
270
  'mean': mean_val,
271
271
  'std': std_val,
@@ -273,15 +273,15 @@ class AudioStatistics:
273
273
  'kurtosis': kurtosis,
274
274
  'distribution_type': self._classify_distribution(skewness, kurtosis)
275
275
  }
276
-
276
+
277
277
  def _classify_distribution(self, skewness: float, kurtosis: float) -> str:
278
278
  """
279
279
  分类分布类型
280
-
280
+
281
281
  Args:
282
282
  skewness: 偏度
283
283
  kurtosis: 峰度
284
-
284
+
285
285
  Returns:
286
286
  分布类型描述
287
287
  """
@@ -297,11 +297,11 @@ class AudioStatistics:
297
297
  return "light_tailed"
298
298
  else:
299
299
  return "unknown"
300
-
300
+
301
301
  def compute_all_statistics(self) -> Dict:
302
302
  """
303
303
  计算所有统计信息
304
-
304
+
305
305
  Returns:
306
306
  完整统计报告
307
307
  """
@@ -318,13 +318,13 @@ class AudioStatistics:
318
318
  'rms': self.detect_outliers('rms')
319
319
  }
320
320
  }
321
-
321
+
322
322
  return self.statistics
323
-
323
+
324
324
  def export_statistics(self, output_path: str):
325
325
  """
326
326
  导出统计结果到JSON文件
327
-
327
+
328
328
  Args:
329
329
  output_path: 输出文件路径
330
330
  """
@@ -342,9 +342,9 @@ class AudioStatistics:
342
342
  return [convert_numpy(item) for item in obj]
343
343
  else:
344
344
  return obj
345
-
345
+
346
346
  stats_json = convert_numpy(self.statistics)
347
-
347
+
348
348
  with open(output_path, 'w', encoding='utf-8') as f:
349
349
  json.dump(stats_json, f, indent=2, ensure_ascii=False)
350
350
 
@@ -352,47 +352,47 @@ class AudioStatistics:
352
352
  def quick_audio_stats(file_paths: List[str], sr: int = 22050) -> Dict:
353
353
  """
354
354
  快速音频统计分析
355
-
355
+
356
356
  Args:
357
357
  file_paths: 音频文件路径列表
358
358
  sr: 采样率
359
-
359
+
360
360
  Returns:
361
361
  统计结果
362
362
  """
363
363
  analyzer = AudioStatistics(sr=sr)
364
-
364
+
365
365
  for file_path in file_paths:
366
366
  analyzer.add_audio_file(file_path)
367
-
367
+
368
368
  return analyzer.compute_all_statistics()
369
369
 
370
370
 
371
- def compare_datasets(dataset1_paths: List[str], dataset2_paths: List[str],
372
- sr: int = 22050) -> Dict:
371
+ def compare_datasets(dataset1_paths: List[str], dataset2_paths: List[str],
372
+ sr: int = 22050) -> Dict:
373
373
  """
374
374
  比较两个数据集
375
-
375
+
376
376
  Args:
377
377
  dataset1_paths: 数据集1文件路径
378
378
  dataset2_paths: 数据集2文件路径
379
379
  sr: 采样率
380
-
380
+
381
381
  Returns:
382
382
  比较结果
383
383
  """
384
384
  analyzer1 = AudioStatistics(sr=sr)
385
385
  analyzer2 = AudioStatistics(sr=sr)
386
-
386
+
387
387
  for path in dataset1_paths:
388
388
  analyzer1.add_audio_file(path)
389
-
389
+
390
390
  for path in dataset2_paths:
391
391
  analyzer2.add_audio_file(path)
392
-
392
+
393
393
  stats1 = analyzer1.compute_all_statistics()
394
394
  stats2 = analyzer2.compute_all_statistics()
395
-
395
+
396
396
  comparison = {
397
397
  'dataset1': stats1,
398
398
  'dataset2': stats2,
@@ -402,5 +402,5 @@ def compare_datasets(dataset1_paths: List[str], dataset2_paths: List[str],
402
402
  'mean_rms_diff': stats2['amplitude_stats']['rms_values']['mean'] - stats1['amplitude_stats']['rms_values']['mean']
403
403
  }
404
404
  }
405
-
406
- return comparison
405
+
406
+ return comparison
@@ -7,13 +7,7 @@ Temporal Features Analysis Module
7
7
 
8
8
  提供音频时域特征提取和分析功能
9
9
  '''
10
-
11
- import warnings
12
- from typing import Tuple, Optional, Union
13
- from scipy import signal
14
10
  import numpy as np
15
- import librosa
16
- # from neverlib.utils.utils import dB
17
11
 
18
12
 
19
13
  def dB(level):
@@ -42,6 +36,11 @@ def rms_amplitude(wav, frame_length=512, hop_length=256):
42
36
  :param wav: (*, ch)
43
37
  :return: (frame_num,)
44
38
  """
39
+ try:
40
+ import librosa
41
+ except Exception as e:
42
+ raise ImportError("需要安装 librosa 才能使用 rms_amplitude: pip install librosa") from e
43
+
45
44
  # 分帧
46
45
  frame = librosa.util.frame(wav.flatten(), frame_length=frame_length, hop_length=hop_length) # (frame_length, frame_num)
47
46
  rms_amp = np.sqrt(np.mean(frame**2, axis=0)) # (frame_num,)
@@ -82,6 +81,11 @@ def zero_crossing_rate(self, audio: np.ndarray) -> np.ndarray:
82
81
  Returns:
83
82
  过零率数组
84
83
  """
84
+ try:
85
+ import librosa
86
+ except Exception as e:
87
+ raise ImportError("需要安装 librosa 才能使用 zero_crossing_rate: pip install librosa") from e
88
+
85
89
  return librosa.feature.zero_crossing_rate(
86
90
  audio, frame_length=self.frame_length, hop_length=self.hop_length
87
91
  )[0]
@@ -97,6 +101,11 @@ def short_time_energy(self, audio: np.ndarray) -> np.ndarray:
97
101
  Returns:
98
102
  短时能量数组
99
103
  """
104
+ try:
105
+ import librosa
106
+ except Exception as e:
107
+ raise ImportError("需要安装 librosa 才能使用 short_time_energy: pip install librosa") from e
108
+
100
109
  # 分帧
101
110
  frames = librosa.util.frame(
102
111
  audio, frame_length=self.frame_length, hop_length=self.hop_length