neverlib 0.2.6__py3-none-any.whl → 0.2.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. neverlib/.claude/settings.local.json +9 -0
  2. neverlib/Docs/audio_aug/test_volume.ipynb +416 -0
  3. neverlib/Docs/audio_aug_test/test_volume.ipynb +289 -0
  4. neverlib/Docs/filter/biquad.ipynb +129 -0
  5. neverlib/Docs/filter/filter_family.ipynb +450 -0
  6. neverlib/Docs/filter/highpass.ipynb +139 -0
  7. neverlib/Docs/filter/scipy_filter_family.ipynb +110 -0
  8. neverlib/Docs/vad/VAD_Energy.ipynb +167 -0
  9. neverlib/Docs/vad/VAD_Silero.ipynb +325 -0
  10. neverlib/Docs/vad/VAD_WebRTC.ipynb +189 -0
  11. neverlib/Docs/vad/VAD_funasr.ipynb +192 -0
  12. neverlib/Docs/vad/VAD_rvADfast.ipynb +162 -0
  13. neverlib/Docs/vad/VAD_statistics.ipynb +532 -0
  14. neverlib/Docs/vad/VAD_tenVAD.ipynb +292 -0
  15. neverlib/Docs/vad/VAD_vadlib.ipynb +168 -0
  16. neverlib/Docs/vad/VAD_whisper.ipynb +404 -0
  17. neverlib/QA/gen_init.py +117 -0
  18. neverlib/QA/get_fun.py +19 -0
  19. neverlib/__init__.py +21 -4
  20. neverlib/audio_aug/HarmonicDistortion.py +19 -13
  21. neverlib/audio_aug/__init__.py +30 -12
  22. neverlib/audio_aug/audio_aug.py +19 -14
  23. neverlib/audio_aug/clip_aug.py +15 -18
  24. neverlib/audio_aug/coder_aug.py +44 -24
  25. neverlib/audio_aug/coder_aug2.py +54 -37
  26. neverlib/audio_aug/loss_packet_aug.py +7 -7
  27. neverlib/audio_aug/quant_aug.py +19 -17
  28. neverlib/data/000_short_enhance.wav +0 -0
  29. neverlib/data/3956_speech.wav +0 -0
  30. neverlib/data/3956_sweep.wav +0 -0
  31. neverlib/data/vad_example.wav +0 -0
  32. neverlib/data/white.wav +0 -0
  33. neverlib/data/white_EQ.wav +0 -0
  34. neverlib/data/white_matched.wav +0 -0
  35. neverlib/data_analyze/__init__.py +25 -20
  36. neverlib/data_analyze/dataset_analyzer.py +109 -114
  37. neverlib/data_analyze/quality_metrics.py +87 -89
  38. neverlib/data_analyze/rms_distrubution.py +23 -42
  39. neverlib/data_analyze/spectral_analysis.py +43 -46
  40. neverlib/data_analyze/statistics.py +76 -76
  41. neverlib/data_analyze/temporal_features.py +15 -6
  42. neverlib/data_analyze/visualization.py +208 -144
  43. neverlib/filter/__init__.py +17 -20
  44. neverlib/filter/auto_eq/__init__.py +18 -35
  45. neverlib/filter/auto_eq/de_eq.py +0 -2
  46. neverlib/filter/common.py +24 -5
  47. neverlib/metrics/DNSMOS/bak_ovr.onnx +0 -0
  48. neverlib/metrics/DNSMOS/model_v8.onnx +0 -0
  49. neverlib/metrics/DNSMOS/sig.onnx +0 -0
  50. neverlib/metrics/DNSMOS/sig_bak_ovr.onnx +0 -0
  51. neverlib/metrics/__init__.py +23 -0
  52. neverlib/metrics/dnsmos.py +4 -15
  53. neverlib/metrics/pDNSMOS/sig_bak_ovr.onnx +0 -0
  54. neverlib/metrics/pesq_c/PESQ +0 -0
  55. neverlib/metrics/pesq_c/dsp.c +553 -0
  56. neverlib/metrics/pesq_c/dsp.h +138 -0
  57. neverlib/metrics/pesq_c/pesq.h +294 -0
  58. neverlib/metrics/pesq_c/pesqdsp.c +1047 -0
  59. neverlib/metrics/pesq_c/pesqio.c +392 -0
  60. neverlib/metrics/pesq_c/pesqmain.c +610 -0
  61. neverlib/metrics/pesq_c/pesqmod.c +1417 -0
  62. neverlib/metrics/pesq_c/pesqpar.h +297 -0
  63. neverlib/metrics/snr.py +5 -1
  64. neverlib/metrics/spec.py +31 -21
  65. neverlib/metrics/test_pesq.py +0 -4
  66. neverlib/tests/test_imports.py +17 -0
  67. neverlib/utils/__init__.py +26 -15
  68. neverlib/utils/audio_split.py +5 -1
  69. neverlib/utils/checkGPU.py +17 -9
  70. neverlib/utils/lazy_expose.py +29 -0
  71. neverlib/utils/utils.py +40 -12
  72. neverlib/vad/__init__.py +33 -25
  73. neverlib/vad/class_get_speech.py +1 -1
  74. neverlib/vad/class_vad.py +3 -3
  75. neverlib/vad/img.png +0 -0
  76. {neverlib-0.2.6.dist-info → neverlib-0.2.7.dist-info}/METADATA +1 -1
  77. {neverlib-0.2.6.dist-info → neverlib-0.2.7.dist-info}/RECORD +80 -37
  78. {neverlib-0.2.6.dist-info → neverlib-0.2.7.dist-info}/WHEEL +0 -0
  79. {neverlib-0.2.6.dist-info → neverlib-0.2.7.dist-info}/licenses/LICENSE +0 -0
  80. {neverlib-0.2.6.dist-info → neverlib-0.2.7.dist-info}/top_level.txt +0 -0
@@ -4,25 +4,20 @@ Dataset Analyzer Module
4
4
 
5
5
  提供音频数据集批量分析和报告生成功能
6
6
  """
7
-
8
- import numpy as np
9
- import librosa
10
7
  import os
11
8
  import json
9
+ import librosa
10
+ import numpy as np
12
11
  from pathlib import Path
13
- from typing import List, Dict, Tuple, Optional, Union, Any
12
+ from typing import List, Dict, Optional, Union, Any
14
13
  from dataclasses import dataclass, asdict
15
14
  from concurrent.futures import ThreadPoolExecutor, as_completed
16
15
  import multiprocessing
17
16
  from tqdm import tqdm
18
- import warnings
19
-
20
- from .statistics import AudioStatistics
21
17
  from .quality_metrics import QualityAnalyzer, audio_health_check
22
- from .spectral_analysis import SpectralAnalyzer, compute_spectral_features
23
- from .temporal_features import TemporalAnalyzer, compute_temporal_features
24
- from .visualization import AudioVisualizer
25
- from .utils import rms_amplitude, dB
18
+ from .spectral_analysis import SpectralAnalyzer
19
+ from .temporal_features import TemporalAnalyzer
20
+ from ..utils import rms_amplitude, dB
26
21
 
27
22
 
28
23
  @dataclass
@@ -35,26 +30,26 @@ class AudioFileInfo:
35
30
  channels: int
36
31
  bit_depth: Optional[int]
37
32
  format: str
38
-
33
+
39
34
  # 基础统计
40
35
  max_amplitude: float
41
36
  rms_amplitude: float
42
37
  mean_amplitude: float
43
38
  std_amplitude: float
44
-
39
+
45
40
  # 质量指标
46
41
  dynamic_range: float
47
42
  snr_estimate: Optional[float]
48
43
  has_clipping: bool
49
44
  is_silent: bool
50
45
  dc_offset: float
51
-
46
+
52
47
  # 特征摘要
53
48
  spectral_centroid_mean: float
54
49
  spectral_rolloff_mean: float
55
50
  zero_crossing_rate_mean: float
56
51
  tempo: Optional[float]
57
-
52
+
58
53
  # 健康状态
59
54
  health_score: float # 0-100
60
55
  issues: List[str]
@@ -63,52 +58,52 @@ class AudioFileInfo:
63
58
 
64
59
  class DatasetAnalyzer:
65
60
  """数据集分析器类"""
66
-
61
+
67
62
  def __init__(self, sr: int = 22050, n_jobs: int = None):
68
63
  """
69
64
  初始化数据集分析器
70
-
65
+
71
66
  Args:
72
67
  sr: 目标采样率
73
68
  n_jobs: 并行作业数量, None表示使用CPU核心数
74
69
  """
75
70
  self.sr = sr
76
71
  self.n_jobs = n_jobs or min(multiprocessing.cpu_count(), 8)
77
-
72
+
78
73
  # 初始化分析器
79
74
  self.quality_analyzer = QualityAnalyzer(sr=sr)
80
75
  self.spectral_analyzer = SpectralAnalyzer(sr=sr)
81
76
  self.temporal_analyzer = TemporalAnalyzer(sr=sr)
82
-
77
+
83
78
  # 分析结果
84
79
  self.file_infos: List[AudioFileInfo] = []
85
80
  self.dataset_summary: Dict = {}
86
81
  self.analysis_complete = False
87
-
82
+
88
83
  def analyze_single_file(self, file_path: str) -> Optional[AudioFileInfo]:
89
84
  """
90
85
  分析单个音频文件
91
-
86
+
92
87
  Args:
93
88
  file_path: 音频文件路径
94
-
89
+
95
90
  Returns:
96
91
  音频文件信息对象
97
92
  """
98
93
  try:
99
94
  # 加载音频
100
95
  audio, original_sr = librosa.load(file_path, sr=None)
101
-
96
+
102
97
  # 如果需要重采样
103
98
  if self.sr != original_sr:
104
99
  audio_resampled = librosa.resample(audio, orig_sr=original_sr, target_sr=self.sr)
105
100
  else:
106
101
  audio_resampled = audio
107
-
102
+
108
103
  # 获取文件基本信息
109
104
  file_size = os.path.getsize(file_path)
110
105
  duration = len(audio) / original_sr
111
-
106
+
112
107
  # 检测音频格式信息
113
108
  try:
114
109
  import soundfile as sf
@@ -120,21 +115,21 @@ class DatasetAnalyzer:
120
115
  channels = 1 if len(audio.shape) == 1 else audio.shape[1]
121
116
  bit_depth = None
122
117
  format_info = Path(file_path).suffix.lower()
123
-
118
+
124
119
  # 基础统计
125
120
  max_amplitude = float(np.max(np.abs(audio_resampled)))
126
121
  rms_amp = float(rms_amplitude(audio_resampled))
127
122
  mean_amplitude = float(np.mean(np.abs(audio_resampled)))
128
123
  std_amplitude = float(np.std(audio_resampled))
129
-
124
+
130
125
  # 质量分析
131
126
  dynamic_range = self.quality_analyzer.dynamic_range(audio_resampled)
132
127
  dc_offset = float(np.mean(audio_resampled))
133
-
128
+
134
129
  # 检测问题
135
130
  has_clipping = max_amplitude >= 0.99
136
131
  is_silent = mean_amplitude < 1e-6
137
-
132
+
138
133
  # SNR估计(基于信号强度和噪声层)
139
134
  snr_estimate = None
140
135
  try:
@@ -150,7 +145,7 @@ class DatasetAnalyzer:
150
145
  snr_estimate = 20 * np.log10(rms_amp / noise_rms)
151
146
  except:
152
147
  pass
153
-
148
+
154
149
  # 频域特征
155
150
  try:
156
151
  spectral_centroid = self.spectral_analyzer.spectral_centroid(audio_resampled)
@@ -160,29 +155,29 @@ class DatasetAnalyzer:
160
155
  except:
161
156
  spectral_centroid_mean = 0.0
162
157
  spectral_rolloff_mean = 0.0
163
-
158
+
164
159
  # 时域特征
165
160
  try:
166
161
  zcr = self.temporal_analyzer.zero_crossing_rate(audio_resampled)
167
162
  zcr_mean = float(np.mean(zcr))
168
-
163
+
169
164
  # 节拍检测
170
165
  tempo, _ = self.temporal_analyzer.tempo_estimation(audio_resampled)
171
166
  tempo = float(tempo) if tempo > 0 else None
172
167
  except:
173
168
  zcr_mean = 0.0
174
169
  tempo = None
175
-
170
+
176
171
  # 健康检查
177
172
  health_check = audio_health_check(audio_resampled, self.sr)
178
173
  issues = health_check['issues']
179
174
  warnings_list = health_check['warnings']
180
-
175
+
181
176
  # 计算健康分数 (0-100)
182
177
  health_score = 100.0
183
178
  health_score -= len(issues) * 20 # 每个严重问题扣20分
184
179
  health_score -= len(warnings_list) * 5 # 每个警告扣5分
185
-
180
+
186
181
  if has_clipping:
187
182
  health_score -= 15
188
183
  if is_silent:
@@ -191,9 +186,9 @@ class DatasetAnalyzer:
191
186
  health_score -= 10
192
187
  if dynamic_range < 6:
193
188
  health_score -= 10
194
-
189
+
195
190
  health_score = max(0.0, min(100.0, health_score))
196
-
191
+
197
192
  # 创建文件信息对象
198
193
  file_info = AudioFileInfo(
199
194
  file_path=file_path,
@@ -203,109 +198,109 @@ class DatasetAnalyzer:
203
198
  channels=channels,
204
199
  bit_depth=bit_depth,
205
200
  format=format_info,
206
-
201
+
207
202
  max_amplitude=max_amplitude,
208
203
  rms_amplitude=rms_amp,
209
204
  mean_amplitude=mean_amplitude,
210
205
  std_amplitude=std_amplitude,
211
-
206
+
212
207
  dynamic_range=dynamic_range,
213
208
  snr_estimate=snr_estimate,
214
209
  has_clipping=has_clipping,
215
210
  is_silent=is_silent,
216
211
  dc_offset=dc_offset,
217
-
212
+
218
213
  spectral_centroid_mean=spectral_centroid_mean,
219
214
  spectral_rolloff_mean=spectral_rolloff_mean,
220
215
  zero_crossing_rate_mean=zcr_mean,
221
216
  tempo=tempo,
222
-
217
+
223
218
  health_score=health_score,
224
219
  issues=issues,
225
220
  warnings=warnings_list
226
221
  )
227
-
222
+
228
223
  return file_info
229
-
224
+
230
225
  except Exception as e:
231
226
  print(f"Error analyzing {file_path}: {str(e)}")
232
227
  return None
233
-
228
+
234
229
  def analyze_dataset(self, file_paths: List[str], show_progress: bool = True) -> Dict[str, Any]:
235
230
  """
236
231
  批量分析数据集
237
-
232
+
238
233
  Args:
239
234
  file_paths: 音频文件路径列表
240
235
  show_progress: 是否显示进度条
241
-
236
+
242
237
  Returns:
243
238
  分析结果摘要
244
239
  """
245
240
  self.file_infos = []
246
-
241
+
247
242
  # 并行处理文件
248
243
  with ThreadPoolExecutor(max_workers=self.n_jobs) as executor:
249
244
  # 提交所有任务
250
245
  future_to_path = {
251
- executor.submit(self.analyze_single_file, path): path
246
+ executor.submit(self.analyze_single_file, path): path
252
247
  for path in file_paths
253
248
  }
254
-
249
+
255
250
  # 收集结果
256
251
  if show_progress:
257
- futures = tqdm(as_completed(future_to_path), total=len(file_paths),
258
- desc="分析音频文件")
252
+ futures = tqdm(as_completed(future_to_path), total=len(file_paths),
253
+ desc="分析音频文件")
259
254
  else:
260
255
  futures = as_completed(future_to_path)
261
-
256
+
262
257
  for future in futures:
263
258
  result = future.result()
264
259
  if result is not None:
265
260
  self.file_infos.append(result)
266
-
261
+
267
262
  # 生成数据集摘要
268
263
  self.dataset_summary = self._generate_dataset_summary()
269
264
  self.analysis_complete = True
270
-
265
+
271
266
  return self.dataset_summary
272
-
267
+
273
268
  def _generate_dataset_summary(self) -> Dict[str, Any]:
274
269
  """
275
270
  生成数据集摘要统计
276
-
271
+
277
272
  Returns:
278
273
  数据集摘要字典
279
274
  """
280
275
  if not self.file_infos:
281
276
  return {}
282
-
277
+
283
278
  # 基础统计
284
279
  total_files = len(self.file_infos)
285
280
  total_duration = sum(info.duration for info in self.file_infos)
286
281
  total_size = sum(info.file_size for info in self.file_infos)
287
-
282
+
288
283
  # 格式统计
289
284
  formats = {}
290
285
  sample_rates = {}
291
286
  channels_count = {}
292
-
287
+
293
288
  for info in self.file_infos:
294
289
  formats[info.format] = formats.get(info.format, 0) + 1
295
290
  sample_rates[info.sample_rate] = sample_rates.get(info.sample_rate, 0) + 1
296
291
  channels_count[info.channels] = channels_count.get(info.channels, 0) + 1
297
-
292
+
298
293
  # 质量统计
299
294
  health_scores = [info.health_score for info in self.file_infos]
300
295
  problematic_files = [info for info in self.file_infos if info.health_score < 80]
301
296
  silent_files = [info for info in self.file_infos if info.is_silent]
302
297
  clipped_files = [info for info in self.file_infos if info.has_clipping]
303
-
298
+
304
299
  # 音频特征统计
305
300
  durations = [info.duration for info in self.file_infos]
306
301
  rms_values = [info.rms_amplitude for info in self.file_infos]
307
302
  dynamic_ranges = [info.dynamic_range for info in self.file_infos]
308
-
303
+
309
304
  # 生成摘要
310
305
  summary = {
311
306
  'overview': {
@@ -315,13 +310,13 @@ class DatasetAnalyzer:
315
310
  'average_file_duration': np.mean(durations),
316
311
  'analysis_target_sr': self.sr
317
312
  },
318
-
313
+
319
314
  'format_distribution': {
320
315
  'formats': formats,
321
316
  'sample_rates': sample_rates,
322
317
  'channels': channels_count
323
318
  },
324
-
319
+
325
320
  'duration_statistics': {
326
321
  'mean': np.mean(durations),
327
322
  'median': np.median(durations),
@@ -335,7 +330,7 @@ class DatasetAnalyzer:
335
330
  '95th': np.percentile(durations, 95)
336
331
  }
337
332
  },
338
-
333
+
339
334
  'quality_assessment': {
340
335
  'average_health_score': np.mean(health_scores),
341
336
  'problematic_files_count': len(problematic_files),
@@ -350,7 +345,7 @@ class DatasetAnalyzer:
350
345
  'bad (<60)': len([s for s in health_scores if s < 60])
351
346
  }
352
347
  },
353
-
348
+
354
349
  'audio_characteristics': {
355
350
  'rms_statistics': {
356
351
  'mean_linear': np.mean(rms_values),
@@ -367,91 +362,91 @@ class DatasetAnalyzer:
367
362
  'max': np.max(dynamic_ranges)
368
363
  }
369
364
  },
370
-
365
+
371
366
  'recommendations': self._generate_recommendations()
372
367
  }
373
-
368
+
374
369
  return summary
375
-
370
+
376
371
  def _generate_recommendations(self) -> List[str]:
377
372
  """
378
373
  基于分析结果生成改进建议
379
-
374
+
380
375
  Returns:
381
376
  建议列表
382
377
  """
383
378
  recommendations = []
384
-
379
+
385
380
  if not self.file_infos:
386
381
  return recommendations
387
-
382
+
388
383
  # 检查质量问题
389
384
  problematic_count = len([info for info in self.file_infos if info.health_score < 80])
390
385
  if problematic_count > 0:
391
386
  recommendations.append(f"发现 {problematic_count} 个文件存在质量问题, 建议进行质量检查和修复")
392
-
387
+
393
388
  # 检查削波
394
389
  clipped_count = len([info for info in self.file_infos if info.has_clipping])
395
390
  if clipped_count > 0:
396
391
  recommendations.append(f"发现 {clipped_count} 个文件存在削波, 建议重新录制或降低增益")
397
-
392
+
398
393
  # 检查静音文件
399
394
  silent_count = len([info for info in self.file_infos if info.is_silent])
400
395
  if silent_count > 0:
401
396
  recommendations.append(f"发现 {silent_count} 个静音文件, 建议移除或重新录制")
402
-
397
+
403
398
  # 检查采样率一致性
404
399
  sample_rates = set(info.sample_rate for info in self.file_infos)
405
400
  if len(sample_rates) > 1:
406
401
  recommendations.append(f"数据集包含多种采样率 {sample_rates}, 建议统一采样率")
407
-
402
+
408
403
  # 检查动态范围
409
404
  low_dr_count = len([info for info in self.file_infos if info.dynamic_range < 20])
410
405
  if low_dr_count > len(self.file_infos) * 0.2: # 超过20%的文件动态范围过低
411
406
  recommendations.append("大量文件动态范围过低, 可能影响音频质量")
412
-
407
+
413
408
  # 检查时长分布
414
409
  durations = [info.duration for info in self.file_infos]
415
410
  duration_std = np.std(durations)
416
411
  duration_mean = np.mean(durations)
417
412
  if duration_std / duration_mean > 0.5: # 变异系数大于0.5
418
413
  recommendations.append("文件时长分布不均匀, 可能影响训练效果")
419
-
414
+
420
415
  return recommendations
421
-
416
+
422
417
  def get_problematic_files(self, min_health_score: float = 80) -> List[AudioFileInfo]:
423
418
  """
424
419
  获取有问题的文件列表
425
-
420
+
426
421
  Args:
427
422
  min_health_score: 最低健康分数阈值
428
-
423
+
429
424
  Returns:
430
425
  问题文件列表
431
426
  """
432
427
  return [info for info in self.file_infos if info.health_score < min_health_score]
433
-
428
+
434
429
  def export_results(self, output_dir: str):
435
430
  """
436
431
  导出分析结果
437
-
432
+
438
433
  Args:
439
434
  output_dir: 输出目录
440
435
  """
441
436
  output_path = Path(output_dir)
442
437
  output_path.mkdir(parents=True, exist_ok=True)
443
-
438
+
444
439
  # 导出摘要
445
440
  summary_path = output_path / 'dataset_summary.json'
446
441
  with open(summary_path, 'w', encoding='utf-8') as f:
447
442
  json.dump(self.dataset_summary, f, indent=2, ensure_ascii=False, default=str)
448
-
443
+
449
444
  # 导出详细文件信息
450
445
  details_path = output_path / 'file_details.json'
451
446
  file_details = [asdict(info) for info in self.file_infos]
452
447
  with open(details_path, 'w', encoding='utf-8') as f:
453
448
  json.dump(file_details, f, indent=2, ensure_ascii=False, default=str)
454
-
449
+
455
450
  # 导出问题文件列表
456
451
  problematic_files = self.get_problematic_files()
457
452
  if problematic_files:
@@ -459,35 +454,35 @@ class DatasetAnalyzer:
459
454
  problems_data = [asdict(info) for info in problematic_files]
460
455
  with open(problems_path, 'w', encoding='utf-8') as f:
461
456
  json.dump(problems_data, f, indent=2, ensure_ascii=False, default=str)
462
-
457
+
463
458
  print(f"分析结果已导出到: {output_path}")
464
-
459
+
465
460
  def create_analysis_report(self, output_path: str):
466
461
  """
467
462
  创建HTML分析报告
468
-
463
+
469
464
  Args:
470
465
  output_path: 输出HTML文件路径
471
466
  """
472
467
  if not self.analysis_complete:
473
468
  raise ValueError("请先完成数据集分析")
474
-
469
+
475
470
  html_content = self._generate_html_report()
476
-
471
+
477
472
  with open(output_path, 'w', encoding='utf-8') as f:
478
473
  f.write(html_content)
479
-
474
+
480
475
  print(f"HTML报告已生成: {output_path}")
481
-
476
+
482
477
  def _generate_html_report(self) -> str:
483
478
  """
484
479
  生成HTML格式的分析报告
485
-
480
+
486
481
  Returns:
487
482
  HTML内容字符串
488
483
  """
489
484
  summary = self.dataset_summary
490
-
485
+
491
486
  html = f"""
492
487
  <!DOCTYPE html>
493
488
  <html lang="zh-CN">
@@ -511,7 +506,7 @@ class DatasetAnalyzer:
511
506
  <h1>音频数据集分析报告</h1>
512
507
  <p>生成时间: {pd.Timestamp.now().strftime('%Y-%m-%d %H:%M:%S') if 'pd' in globals() else 'N/A'}</p>
513
508
  </div>
514
-
509
+
515
510
  <div class="section">
516
511
  <h2>数据集概览</h2>
517
512
  <div class="metric">文件总数: {summary['overview']['total_files']}</div>
@@ -519,7 +514,7 @@ class DatasetAnalyzer:
519
514
  <div class="metric">总大小: {summary['overview']['total_size_mb']:.2f} MB</div>
520
515
  <div class="metric">平均文件时长: {summary['overview']['average_file_duration']:.2f} 秒</div>
521
516
  </div>
522
-
517
+
523
518
  <div class="section">
524
519
  <h2>质量评估</h2>
525
520
  <div class="metric">平均健康分数: {summary['quality_assessment']['average_health_score']:.1f}/100</div>
@@ -528,63 +523,63 @@ class DatasetAnalyzer:
528
523
  <div class="metric">静音文件: {summary['quality_assessment']['silent_files_count']}</div>
529
524
  <div class="metric">削波文件: {summary['quality_assessment']['clipped_files_count']}</div>
530
525
  </div>
531
-
526
+
532
527
  <div class="section">
533
528
  <h2>改进建议</h2>
534
529
  """
535
-
530
+
536
531
  for rec in summary['recommendations']:
537
532
  html += f'<div class="recommendation">• {rec}</div>'
538
-
533
+
539
534
  html += """
540
535
  </div>
541
536
  </body>
542
537
  </html>
543
538
  """
544
-
539
+
545
540
  return html
546
541
 
547
542
 
548
- def analyze_audio_dataset(directory: str, output_dir: str = None,
549
- extensions: List[str] = None, sr: int = 22050,
550
- n_jobs: int = None) -> Dict[str, Any]:
543
+ def analyze_audio_dataset(directory: str, output_dir: str = None,
544
+ extensions: List[str] = None, sr: int = 22050,
545
+ n_jobs: int = None) -> Dict[str, Any]:
551
546
  """
552
547
  快速分析音频数据集
553
-
548
+
554
549
  Args:
555
550
  directory: 音频文件目录
556
551
  output_dir: 输出目录(可选)
557
552
  extensions: 支持的文件扩展名
558
553
  sr: 目标采样率
559
554
  n_jobs: 并行作业数
560
-
555
+
561
556
  Returns:
562
557
  分析结果摘要
563
558
  """
564
559
  if extensions is None:
565
560
  extensions = ['.wav', '.mp3', '.flac', '.m4a', '.aac']
566
-
561
+
567
562
  # 收集文件
568
563
  directory_path = Path(directory)
569
564
  file_paths = []
570
565
  for ext in extensions:
571
566
  file_paths.extend(list(directory_path.glob(f'**/*{ext}')))
572
-
567
+
573
568
  file_paths = [str(p) for p in file_paths]
574
-
569
+
575
570
  if not file_paths:
576
571
  raise ValueError(f"在目录 {directory} 中未找到音频文件")
577
-
572
+
578
573
  # 分析数据集
579
574
  analyzer = DatasetAnalyzer(sr=sr, n_jobs=n_jobs)
580
575
  results = analyzer.analyze_dataset(file_paths)
581
-
576
+
582
577
  # 导出结果
583
578
  if output_dir:
584
579
  analyzer.export_results(output_dir)
585
-
580
+
586
581
  # 生成HTML报告
587
582
  html_path = Path(output_dir) / 'analysis_report.html'
588
583
  analyzer.create_analysis_report(str(html_path))
589
-
590
- return results
584
+
585
+ return results