neverlib 0.2.1__py3-none-any.whl → 0.2.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. neverlib/__init__.py +2 -2
  2. neverlib/audio_aug/__init__.py +1 -1
  3. neverlib/audio_aug/audio_aug.py +4 -5
  4. neverlib/dataAnalyze/README.md +234 -0
  5. neverlib/dataAnalyze/__init__.py +87 -0
  6. neverlib/dataAnalyze/dataset_analyzer.py +590 -0
  7. neverlib/dataAnalyze/quality_metrics.py +364 -0
  8. neverlib/dataAnalyze/rms_distrubution.py +62 -0
  9. neverlib/dataAnalyze/spectral_analysis.py +218 -0
  10. neverlib/dataAnalyze/statistics.py +406 -0
  11. neverlib/dataAnalyze/temporal_features.py +126 -0
  12. neverlib/dataAnalyze/visualization.py +468 -0
  13. neverlib/filter/AudoEQ/README.md +165 -0
  14. neverlib/filter/AudoEQ/auto_eq_de.py +361 -0
  15. neverlib/filter/AudoEQ/auto_eq_ga_advanced.py +577 -0
  16. neverlib/filter/AudoEQ/auto_eq_ga_basic.py +380 -0
  17. neverlib/filter/AudoEQ/auto_eq_spectral_direct.py +75 -0
  18. neverlib/filter/README.md +101 -0
  19. neverlib/filter/__init__.py +7 -0
  20. neverlib/filter/biquad.py +45 -0
  21. neverlib/filter/common.py +5 -6
  22. neverlib/filter/core.py +339 -0
  23. neverlib/metrics/dnsmos.py +160 -0
  24. neverlib/metrics/snr.py +177 -0
  25. neverlib/metrics/spec.py +45 -0
  26. neverlib/metrics/test_pesq.py +35 -0
  27. neverlib/metrics/time.py +68 -0
  28. neverlib/tests/test_vad.py +21 -0
  29. neverlib/utils/audio_split.py +5 -3
  30. neverlib/utils/message.py +4 -4
  31. neverlib/utils/utils.py +32 -15
  32. neverlib/vad/PreProcess.py +1 -1
  33. neverlib/vad/README.md +10 -10
  34. neverlib/vad/VAD_Energy.py +1 -1
  35. neverlib/vad/VAD_Silero.py +1 -1
  36. neverlib/vad/VAD_WebRTC.py +1 -1
  37. neverlib/vad/VAD_funasr.py +1 -1
  38. neverlib/vad/VAD_statistics.py +3 -3
  39. neverlib/vad/VAD_vadlib.py +2 -2
  40. neverlib/vad/VAD_whisper.py +1 -1
  41. neverlib/vad/__init__.py +1 -1
  42. neverlib/vad/class_get_speech.py +4 -4
  43. neverlib/vad/class_vad.py +1 -1
  44. neverlib/vad/utils.py +47 -5
  45. {neverlib-0.2.1.dist-info → neverlib-0.2.3.dist-info}/METADATA +120 -120
  46. neverlib-0.2.3.dist-info/RECORD +53 -0
  47. {neverlib-0.2.1.dist-info → neverlib-0.2.3.dist-info}/WHEEL +1 -1
  48. neverlib/Documents/vad/VAD_Energy.ipynb +0 -159
  49. neverlib/Documents/vad/VAD_Silero.ipynb +0 -305
  50. neverlib/Documents/vad/VAD_WebRTC.ipynb +0 -183
  51. neverlib/Documents/vad/VAD_funasr.ipynb +0 -179
  52. neverlib/Documents/vad/VAD_ppasr.ipynb +0 -175
  53. neverlib/Documents/vad/VAD_statistics.ipynb +0 -522
  54. neverlib/Documents/vad/VAD_vadlib.ipynb +0 -184
  55. neverlib/Documents/vad/VAD_whisper.ipynb +0 -430
  56. neverlib/utils/waveform_analyzer.py +0 -51
  57. neverlib/wav_data/000_short.wav +0 -0
  58. neverlib-0.2.1.dist-info/RECORD +0 -40
  59. {neverlib-0.2.1.dist-info → neverlib-0.2.3.dist-info}/licenses/LICENSE +0 -0
  60. {neverlib-0.2.1.dist-info → neverlib-0.2.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,590 @@
1
+ """
2
+ 数据集分析工具模块
3
+ Dataset Analyzer Module
4
+
5
+ 提供音频数据集批量分析和报告生成功能
6
+ """
7
+
8
+ import numpy as np
9
+ import librosa
10
+ import os
11
+ import json
12
+ from pathlib import Path
13
+ from typing import List, Dict, Tuple, Optional, Union, Any
14
+ from dataclasses import dataclass, asdict
15
+ from concurrent.futures import ThreadPoolExecutor, as_completed
16
+ import multiprocessing
17
+ from tqdm import tqdm
18
+ import warnings
19
+
20
+ from .statistics import AudioStatistics
21
+ from .quality_metrics import QualityAnalyzer, audio_health_check
22
+ from .spectral_analysis import SpectralAnalyzer, compute_spectral_features
23
+ from .temporal_features import TemporalAnalyzer, compute_temporal_features
24
+ from .visualization import AudioVisualizer
25
+ from .utils import rms_amplitude, dB
26
+
27
+
28
+ @dataclass
29
+ class AudioFileInfo:
30
+ """音频文件信息数据类"""
31
+ file_path: str
32
+ file_size: int # bytes
33
+ duration: float # seconds
34
+ sample_rate: int
35
+ channels: int
36
+ bit_depth: Optional[int]
37
+ format: str
38
+
39
+ # 基础统计
40
+ max_amplitude: float
41
+ rms_amplitude: float
42
+ mean_amplitude: float
43
+ std_amplitude: float
44
+
45
+ # 质量指标
46
+ dynamic_range: float
47
+ snr_estimate: Optional[float]
48
+ has_clipping: bool
49
+ is_silent: bool
50
+ dc_offset: float
51
+
52
+ # 特征摘要
53
+ spectral_centroid_mean: float
54
+ spectral_rolloff_mean: float
55
+ zero_crossing_rate_mean: float
56
+ tempo: Optional[float]
57
+
58
+ # 健康状态
59
+ health_score: float # 0-100
60
+ issues: List[str]
61
+ warnings: List[str]
62
+
63
+
64
+ class DatasetAnalyzer:
65
+ """数据集分析器类"""
66
+
67
+ def __init__(self, sr: int = 22050, n_jobs: int = None):
68
+ """
69
+ 初始化数据集分析器
70
+
71
+ Args:
72
+ sr: 目标采样率
73
+ n_jobs: 并行作业数量, None表示使用CPU核心数
74
+ """
75
+ self.sr = sr
76
+ self.n_jobs = n_jobs or min(multiprocessing.cpu_count(), 8)
77
+
78
+ # 初始化分析器
79
+ self.quality_analyzer = QualityAnalyzer(sr=sr)
80
+ self.spectral_analyzer = SpectralAnalyzer(sr=sr)
81
+ self.temporal_analyzer = TemporalAnalyzer(sr=sr)
82
+
83
+ # 分析结果
84
+ self.file_infos: List[AudioFileInfo] = []
85
+ self.dataset_summary: Dict = {}
86
+ self.analysis_complete = False
87
+
88
+ def analyze_single_file(self, file_path: str) -> Optional[AudioFileInfo]:
89
+ """
90
+ 分析单个音频文件
91
+
92
+ Args:
93
+ file_path: 音频文件路径
94
+
95
+ Returns:
96
+ 音频文件信息对象
97
+ """
98
+ try:
99
+ # 加载音频
100
+ audio, original_sr = librosa.load(file_path, sr=None)
101
+
102
+ # 如果需要重采样
103
+ if self.sr != original_sr:
104
+ audio_resampled = librosa.resample(audio, orig_sr=original_sr, target_sr=self.sr)
105
+ else:
106
+ audio_resampled = audio
107
+
108
+ # 获取文件基本信息
109
+ file_size = os.path.getsize(file_path)
110
+ duration = len(audio) / original_sr
111
+
112
+ # 检测音频格式信息
113
+ try:
114
+ import soundfile as sf
115
+ with sf.SoundFile(file_path) as f:
116
+ channels = f.channels
117
+ bit_depth = f.subtype_info.bits if hasattr(f.subtype_info, 'bits') else None
118
+ format_info = f.format
119
+ except:
120
+ channels = 1 if len(audio.shape) == 1 else audio.shape[1]
121
+ bit_depth = None
122
+ format_info = Path(file_path).suffix.lower()
123
+
124
+ # 基础统计
125
+ max_amplitude = float(np.max(np.abs(audio_resampled)))
126
+ rms_amp = float(rms_amplitude(audio_resampled))
127
+ mean_amplitude = float(np.mean(np.abs(audio_resampled)))
128
+ std_amplitude = float(np.std(audio_resampled))
129
+
130
+ # 质量分析
131
+ dynamic_range = self.quality_analyzer.dynamic_range(audio_resampled)
132
+ dc_offset = float(np.mean(audio_resampled))
133
+
134
+ # 检测问题
135
+ has_clipping = max_amplitude >= 0.99
136
+ is_silent = mean_amplitude < 1e-6
137
+
138
+ # SNR估计(基于信号强度和噪声层)
139
+ snr_estimate = None
140
+ try:
141
+ if not is_silent:
142
+ # 简单的SNR估计:使用开头和结尾的部分作为噪声估计
143
+ noise_duration = min(0.5, duration * 0.1) # 取较小值
144
+ noise_samples = int(noise_duration * self.sr)
145
+ if noise_samples > 0:
146
+ noise_start = audio_resampled[:noise_samples]
147
+ noise_end = audio_resampled[-noise_samples:]
148
+ noise_rms = np.sqrt(np.mean(np.concatenate([noise_start, noise_end]) ** 2))
149
+ if noise_rms > 0:
150
+ snr_estimate = 20 * np.log10(rms_amp / noise_rms)
151
+ except:
152
+ pass
153
+
154
+ # 频域特征
155
+ try:
156
+ spectral_centroid = self.spectral_analyzer.spectral_centroid(audio_resampled)
157
+ spectral_rolloff = self.spectral_analyzer.spectral_rolloff(audio_resampled)
158
+ spectral_centroid_mean = float(np.mean(spectral_centroid))
159
+ spectral_rolloff_mean = float(np.mean(spectral_rolloff))
160
+ except:
161
+ spectral_centroid_mean = 0.0
162
+ spectral_rolloff_mean = 0.0
163
+
164
+ # 时域特征
165
+ try:
166
+ zcr = self.temporal_analyzer.zero_crossing_rate(audio_resampled)
167
+ zcr_mean = float(np.mean(zcr))
168
+
169
+ # 节拍检测
170
+ tempo, _ = self.temporal_analyzer.tempo_estimation(audio_resampled)
171
+ tempo = float(tempo) if tempo > 0 else None
172
+ except:
173
+ zcr_mean = 0.0
174
+ tempo = None
175
+
176
+ # 健康检查
177
+ health_check = audio_health_check(audio_resampled, self.sr)
178
+ issues = health_check['issues']
179
+ warnings_list = health_check['warnings']
180
+
181
+ # 计算健康分数 (0-100)
182
+ health_score = 100.0
183
+ health_score -= len(issues) * 20 # 每个严重问题扣20分
184
+ health_score -= len(warnings_list) * 5 # 每个警告扣5分
185
+
186
+ if has_clipping:
187
+ health_score -= 15
188
+ if is_silent:
189
+ health_score -= 30
190
+ if abs(dc_offset) > 0.01:
191
+ health_score -= 10
192
+ if dynamic_range < 6:
193
+ health_score -= 10
194
+
195
+ health_score = max(0.0, min(100.0, health_score))
196
+
197
+ # 创建文件信息对象
198
+ file_info = AudioFileInfo(
199
+ file_path=file_path,
200
+ file_size=file_size,
201
+ duration=duration,
202
+ sample_rate=original_sr,
203
+ channels=channels,
204
+ bit_depth=bit_depth,
205
+ format=format_info,
206
+
207
+ max_amplitude=max_amplitude,
208
+ rms_amplitude=rms_amp,
209
+ mean_amplitude=mean_amplitude,
210
+ std_amplitude=std_amplitude,
211
+
212
+ dynamic_range=dynamic_range,
213
+ snr_estimate=snr_estimate,
214
+ has_clipping=has_clipping,
215
+ is_silent=is_silent,
216
+ dc_offset=dc_offset,
217
+
218
+ spectral_centroid_mean=spectral_centroid_mean,
219
+ spectral_rolloff_mean=spectral_rolloff_mean,
220
+ zero_crossing_rate_mean=zcr_mean,
221
+ tempo=tempo,
222
+
223
+ health_score=health_score,
224
+ issues=issues,
225
+ warnings=warnings_list
226
+ )
227
+
228
+ return file_info
229
+
230
+ except Exception as e:
231
+ print(f"Error analyzing {file_path}: {str(e)}")
232
+ return None
233
+
234
+ def analyze_dataset(self, file_paths: List[str], show_progress: bool = True) -> Dict[str, Any]:
235
+ """
236
+ 批量分析数据集
237
+
238
+ Args:
239
+ file_paths: 音频文件路径列表
240
+ show_progress: 是否显示进度条
241
+
242
+ Returns:
243
+ 分析结果摘要
244
+ """
245
+ self.file_infos = []
246
+
247
+ # 并行处理文件
248
+ with ThreadPoolExecutor(max_workers=self.n_jobs) as executor:
249
+ # 提交所有任务
250
+ future_to_path = {
251
+ executor.submit(self.analyze_single_file, path): path
252
+ for path in file_paths
253
+ }
254
+
255
+ # 收集结果
256
+ if show_progress:
257
+ futures = tqdm(as_completed(future_to_path), total=len(file_paths),
258
+ desc="分析音频文件")
259
+ else:
260
+ futures = as_completed(future_to_path)
261
+
262
+ for future in futures:
263
+ result = future.result()
264
+ if result is not None:
265
+ self.file_infos.append(result)
266
+
267
+ # 生成数据集摘要
268
+ self.dataset_summary = self._generate_dataset_summary()
269
+ self.analysis_complete = True
270
+
271
+ return self.dataset_summary
272
+
273
+ def _generate_dataset_summary(self) -> Dict[str, Any]:
274
+ """
275
+ 生成数据集摘要统计
276
+
277
+ Returns:
278
+ 数据集摘要字典
279
+ """
280
+ if not self.file_infos:
281
+ return {}
282
+
283
+ # 基础统计
284
+ total_files = len(self.file_infos)
285
+ total_duration = sum(info.duration for info in self.file_infos)
286
+ total_size = sum(info.file_size for info in self.file_infos)
287
+
288
+ # 格式统计
289
+ formats = {}
290
+ sample_rates = {}
291
+ channels_count = {}
292
+
293
+ for info in self.file_infos:
294
+ formats[info.format] = formats.get(info.format, 0) + 1
295
+ sample_rates[info.sample_rate] = sample_rates.get(info.sample_rate, 0) + 1
296
+ channels_count[info.channels] = channels_count.get(info.channels, 0) + 1
297
+
298
+ # 质量统计
299
+ health_scores = [info.health_score for info in self.file_infos]
300
+ problematic_files = [info for info in self.file_infos if info.health_score < 80]
301
+ silent_files = [info for info in self.file_infos if info.is_silent]
302
+ clipped_files = [info for info in self.file_infos if info.has_clipping]
303
+
304
+ # 音频特征统计
305
+ durations = [info.duration for info in self.file_infos]
306
+ rms_values = [info.rms_amplitude for info in self.file_infos]
307
+ dynamic_ranges = [info.dynamic_range for info in self.file_infos]
308
+
309
+ # 生成摘要
310
+ summary = {
311
+ 'overview': {
312
+ 'total_files': total_files,
313
+ 'total_duration_hours': total_duration / 3600,
314
+ 'total_size_mb': total_size / (1024 * 1024),
315
+ 'average_file_duration': np.mean(durations),
316
+ 'analysis_target_sr': self.sr
317
+ },
318
+
319
+ 'format_distribution': {
320
+ 'formats': formats,
321
+ 'sample_rates': sample_rates,
322
+ 'channels': channels_count
323
+ },
324
+
325
+ 'duration_statistics': {
326
+ 'mean': np.mean(durations),
327
+ 'median': np.median(durations),
328
+ 'std': np.std(durations),
329
+ 'min': np.min(durations),
330
+ 'max': np.max(durations),
331
+ 'percentiles': {
332
+ '25th': np.percentile(durations, 25),
333
+ '75th': np.percentile(durations, 75),
334
+ '90th': np.percentile(durations, 90),
335
+ '95th': np.percentile(durations, 95)
336
+ }
337
+ },
338
+
339
+ 'quality_assessment': {
340
+ 'average_health_score': np.mean(health_scores),
341
+ 'problematic_files_count': len(problematic_files),
342
+ 'problematic_files_percentage': len(problematic_files) / total_files * 100,
343
+ 'silent_files_count': len(silent_files),
344
+ 'clipped_files_count': len(clipped_files),
345
+ 'quality_distribution': {
346
+ 'excellent (90-100)': len([s for s in health_scores if s >= 90]),
347
+ 'good (80-89)': len([s for s in health_scores if 80 <= s < 90]),
348
+ 'fair (70-79)': len([s for s in health_scores if 70 <= s < 80]),
349
+ 'poor (60-69)': len([s for s in health_scores if 60 <= s < 70]),
350
+ 'bad (<60)': len([s for s in health_scores if s < 60])
351
+ }
352
+ },
353
+
354
+ 'audio_characteristics': {
355
+ 'rms_statistics': {
356
+ 'mean_linear': np.mean(rms_values),
357
+ 'mean_db': dB(np.mean(rms_values)),
358
+ 'std_linear': np.std(rms_values),
359
+ 'min_db': dB(np.min(rms_values)) if np.min(rms_values) > 0 else -float('inf'),
360
+ 'max_db': dB(np.max(rms_values))
361
+ },
362
+ 'dynamic_range_statistics': {
363
+ 'mean': np.mean(dynamic_ranges),
364
+ 'median': np.median(dynamic_ranges),
365
+ 'std': np.std(dynamic_ranges),
366
+ 'min': np.min(dynamic_ranges),
367
+ 'max': np.max(dynamic_ranges)
368
+ }
369
+ },
370
+
371
+ 'recommendations': self._generate_recommendations()
372
+ }
373
+
374
+ return summary
375
+
376
+ def _generate_recommendations(self) -> List[str]:
377
+ """
378
+ 基于分析结果生成改进建议
379
+
380
+ Returns:
381
+ 建议列表
382
+ """
383
+ recommendations = []
384
+
385
+ if not self.file_infos:
386
+ return recommendations
387
+
388
+ # 检查质量问题
389
+ problematic_count = len([info for info in self.file_infos if info.health_score < 80])
390
+ if problematic_count > 0:
391
+ recommendations.append(f"发现 {problematic_count} 个文件存在质量问题, 建议进行质量检查和修复")
392
+
393
+ # 检查削波
394
+ clipped_count = len([info for info in self.file_infos if info.has_clipping])
395
+ if clipped_count > 0:
396
+ recommendations.append(f"发现 {clipped_count} 个文件存在削波, 建议重新录制或降低增益")
397
+
398
+ # 检查静音文件
399
+ silent_count = len([info for info in self.file_infos if info.is_silent])
400
+ if silent_count > 0:
401
+ recommendations.append(f"发现 {silent_count} 个静音文件, 建议移除或重新录制")
402
+
403
+ # 检查采样率一致性
404
+ sample_rates = set(info.sample_rate for info in self.file_infos)
405
+ if len(sample_rates) > 1:
406
+ recommendations.append(f"数据集包含多种采样率 {sample_rates}, 建议统一采样率")
407
+
408
+ # 检查动态范围
409
+ low_dr_count = len([info for info in self.file_infos if info.dynamic_range < 20])
410
+ if low_dr_count > len(self.file_infos) * 0.2: # 超过20%的文件动态范围过低
411
+ recommendations.append("大量文件动态范围过低, 可能影响音频质量")
412
+
413
+ # 检查时长分布
414
+ durations = [info.duration for info in self.file_infos]
415
+ duration_std = np.std(durations)
416
+ duration_mean = np.mean(durations)
417
+ if duration_std / duration_mean > 0.5: # 变异系数大于0.5
418
+ recommendations.append("文件时长分布不均匀, 可能影响训练效果")
419
+
420
+ return recommendations
421
+
422
+ def get_problematic_files(self, min_health_score: float = 80) -> List[AudioFileInfo]:
423
+ """
424
+ 获取有问题的文件列表
425
+
426
+ Args:
427
+ min_health_score: 最低健康分数阈值
428
+
429
+ Returns:
430
+ 问题文件列表
431
+ """
432
+ return [info for info in self.file_infos if info.health_score < min_health_score]
433
+
434
+ def export_results(self, output_dir: str):
435
+ """
436
+ 导出分析结果
437
+
438
+ Args:
439
+ output_dir: 输出目录
440
+ """
441
+ output_path = Path(output_dir)
442
+ output_path.mkdir(parents=True, exist_ok=True)
443
+
444
+ # 导出摘要
445
+ summary_path = output_path / 'dataset_summary.json'
446
+ with open(summary_path, 'w', encoding='utf-8') as f:
447
+ json.dump(self.dataset_summary, f, indent=2, ensure_ascii=False, default=str)
448
+
449
+ # 导出详细文件信息
450
+ details_path = output_path / 'file_details.json'
451
+ file_details = [asdict(info) for info in self.file_infos]
452
+ with open(details_path, 'w', encoding='utf-8') as f:
453
+ json.dump(file_details, f, indent=2, ensure_ascii=False, default=str)
454
+
455
+ # 导出问题文件列表
456
+ problematic_files = self.get_problematic_files()
457
+ if problematic_files:
458
+ problems_path = output_path / 'problematic_files.json'
459
+ problems_data = [asdict(info) for info in problematic_files]
460
+ with open(problems_path, 'w', encoding='utf-8') as f:
461
+ json.dump(problems_data, f, indent=2, ensure_ascii=False, default=str)
462
+
463
+ print(f"分析结果已导出到: {output_path}")
464
+
465
+ def create_analysis_report(self, output_path: str):
466
+ """
467
+ 创建HTML分析报告
468
+
469
+ Args:
470
+ output_path: 输出HTML文件路径
471
+ """
472
+ if not self.analysis_complete:
473
+ raise ValueError("请先完成数据集分析")
474
+
475
+ html_content = self._generate_html_report()
476
+
477
+ with open(output_path, 'w', encoding='utf-8') as f:
478
+ f.write(html_content)
479
+
480
+ print(f"HTML报告已生成: {output_path}")
481
+
482
+ def _generate_html_report(self) -> str:
483
+ """
484
+ 生成HTML格式的分析报告
485
+
486
+ Returns:
487
+ HTML内容字符串
488
+ """
489
+ summary = self.dataset_summary
490
+
491
+ html = f"""
492
+ <!DOCTYPE html>
493
+ <html lang="zh-CN">
494
+ <head>
495
+ <meta charset="UTF-8">
496
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
497
+ <title>音频数据集分析报告</title>
498
+ <style>
499
+ body {{ font-family: Arial, sans-serif; margin: 20px; }}
500
+ .header {{ background-color: #f0f0f0; padding: 20px; text-align: center; }}
501
+ .section {{ margin: 20px 0; padding: 15px; border: 1px solid #ddd; }}
502
+ .metric {{ display: inline-block; margin: 10px; padding: 10px; background-color: #f9f9f9; }}
503
+ .recommendation {{ background-color: #fff3cd; padding: 10px; margin: 5px 0; }}
504
+ table {{ border-collapse: collapse; width: 100%; }}
505
+ th, td {{ border: 1px solid #ddd; padding: 8px; text-align: left; }}
506
+ th {{ background-color: #f2f2f2; }}
507
+ </style>
508
+ </head>
509
+ <body>
510
+ <div class="header">
511
+ <h1>音频数据集分析报告</h1>
512
+ <p>生成时间: {pd.Timestamp.now().strftime('%Y-%m-%d %H:%M:%S') if 'pd' in globals() else 'N/A'}</p>
513
+ </div>
514
+
515
+ <div class="section">
516
+ <h2>数据集概览</h2>
517
+ <div class="metric">文件总数: {summary['overview']['total_files']}</div>
518
+ <div class="metric">总时长: {summary['overview']['total_duration_hours']:.2f} 小时</div>
519
+ <div class="metric">总大小: {summary['overview']['total_size_mb']:.2f} MB</div>
520
+ <div class="metric">平均文件时长: {summary['overview']['average_file_duration']:.2f} 秒</div>
521
+ </div>
522
+
523
+ <div class="section">
524
+ <h2>质量评估</h2>
525
+ <div class="metric">平均健康分数: {summary['quality_assessment']['average_health_score']:.1f}/100</div>
526
+ <div class="metric">问题文件数量: {summary['quality_assessment']['problematic_files_count']}</div>
527
+ <div class="metric">问题文件比例: {summary['quality_assessment']['problematic_files_percentage']:.1f}%</div>
528
+ <div class="metric">静音文件: {summary['quality_assessment']['silent_files_count']}</div>
529
+ <div class="metric">削波文件: {summary['quality_assessment']['clipped_files_count']}</div>
530
+ </div>
531
+
532
+ <div class="section">
533
+ <h2>改进建议</h2>
534
+ """
535
+
536
+ for rec in summary['recommendations']:
537
+ html += f'<div class="recommendation">• {rec}</div>'
538
+
539
+ html += """
540
+ </div>
541
+ </body>
542
+ </html>
543
+ """
544
+
545
+ return html
546
+
547
+
548
+ def analyze_audio_dataset(directory: str, output_dir: str = None,
549
+ extensions: List[str] = None, sr: int = 22050,
550
+ n_jobs: int = None) -> Dict[str, Any]:
551
+ """
552
+ 快速分析音频数据集
553
+
554
+ Args:
555
+ directory: 音频文件目录
556
+ output_dir: 输出目录(可选)
557
+ extensions: 支持的文件扩展名
558
+ sr: 目标采样率
559
+ n_jobs: 并行作业数
560
+
561
+ Returns:
562
+ 分析结果摘要
563
+ """
564
+ if extensions is None:
565
+ extensions = ['.wav', '.mp3', '.flac', '.m4a', '.aac']
566
+
567
+ # 收集文件
568
+ directory_path = Path(directory)
569
+ file_paths = []
570
+ for ext in extensions:
571
+ file_paths.extend(list(directory_path.glob(f'**/*{ext}')))
572
+
573
+ file_paths = [str(p) for p in file_paths]
574
+
575
+ if not file_paths:
576
+ raise ValueError(f"在目录 {directory} 中未找到音频文件")
577
+
578
+ # 分析数据集
579
+ analyzer = DatasetAnalyzer(sr=sr, n_jobs=n_jobs)
580
+ results = analyzer.analyze_dataset(file_paths)
581
+
582
+ # 导出结果
583
+ if output_dir:
584
+ analyzer.export_results(output_dir)
585
+
586
+ # 生成HTML报告
587
+ html_path = Path(output_dir) / 'analysis_report.html'
588
+ analyzer.create_analysis_report(str(html_path))
589
+
590
+ return results