neverlib 0.2.6__py3-none-any.whl → 0.2.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- neverlib/.claude/settings.local.json +9 -0
- neverlib/Docs/audio_aug/test_volume.ipynb +416 -0
- neverlib/Docs/audio_aug_test/test_volume.ipynb +289 -0
- neverlib/Docs/filter/biquad.ipynb +129 -0
- neverlib/Docs/filter/filter_family.ipynb +450 -0
- neverlib/Docs/filter/highpass.ipynb +139 -0
- neverlib/Docs/filter/scipy_filter_family.ipynb +110 -0
- neverlib/Docs/vad/VAD_Energy.ipynb +167 -0
- neverlib/Docs/vad/VAD_Silero.ipynb +325 -0
- neverlib/Docs/vad/VAD_WebRTC.ipynb +189 -0
- neverlib/Docs/vad/VAD_funasr.ipynb +192 -0
- neverlib/Docs/vad/VAD_rvADfast.ipynb +162 -0
- neverlib/Docs/vad/VAD_statistics.ipynb +532 -0
- neverlib/Docs/vad/VAD_tenVAD.ipynb +292 -0
- neverlib/Docs/vad/VAD_vadlib.ipynb +168 -0
- neverlib/Docs/vad/VAD_whisper.ipynb +404 -0
- neverlib/QA/gen_init.py +117 -0
- neverlib/QA/get_fun.py +19 -0
- neverlib/__init__.py +21 -4
- neverlib/audio_aug/HarmonicDistortion.py +19 -13
- neverlib/audio_aug/__init__.py +30 -12
- neverlib/audio_aug/audio_aug.py +19 -14
- neverlib/audio_aug/clip_aug.py +15 -18
- neverlib/audio_aug/coder_aug.py +44 -24
- neverlib/audio_aug/coder_aug2.py +54 -37
- neverlib/audio_aug/loss_packet_aug.py +7 -7
- neverlib/audio_aug/quant_aug.py +19 -17
- neverlib/data/000_short_enhance.wav +0 -0
- neverlib/data/3956_speech.wav +0 -0
- neverlib/data/3956_sweep.wav +0 -0
- neverlib/data/vad_example.wav +0 -0
- neverlib/data/white.wav +0 -0
- neverlib/data/white_EQ.wav +0 -0
- neverlib/data/white_matched.wav +0 -0
- neverlib/data_analyze/__init__.py +25 -20
- neverlib/data_analyze/dataset_analyzer.py +109 -114
- neverlib/data_analyze/quality_metrics.py +87 -89
- neverlib/data_analyze/rms_distrubution.py +23 -42
- neverlib/data_analyze/spectral_analysis.py +43 -46
- neverlib/data_analyze/statistics.py +76 -76
- neverlib/data_analyze/temporal_features.py +15 -6
- neverlib/data_analyze/visualization.py +208 -144
- neverlib/filter/__init__.py +17 -20
- neverlib/filter/auto_eq/__init__.py +18 -35
- neverlib/filter/auto_eq/de_eq.py +0 -2
- neverlib/filter/common.py +24 -5
- neverlib/metrics/DNSMOS/bak_ovr.onnx +0 -0
- neverlib/metrics/DNSMOS/model_v8.onnx +0 -0
- neverlib/metrics/DNSMOS/sig.onnx +0 -0
- neverlib/metrics/DNSMOS/sig_bak_ovr.onnx +0 -0
- neverlib/metrics/__init__.py +23 -0
- neverlib/metrics/dnsmos.py +4 -15
- neverlib/metrics/pDNSMOS/sig_bak_ovr.onnx +0 -0
- neverlib/metrics/pesq_c/PESQ +0 -0
- neverlib/metrics/pesq_c/dsp.c +553 -0
- neverlib/metrics/pesq_c/dsp.h +138 -0
- neverlib/metrics/pesq_c/pesq.h +294 -0
- neverlib/metrics/pesq_c/pesqdsp.c +1047 -0
- neverlib/metrics/pesq_c/pesqio.c +392 -0
- neverlib/metrics/pesq_c/pesqmain.c +610 -0
- neverlib/metrics/pesq_c/pesqmod.c +1417 -0
- neverlib/metrics/pesq_c/pesqpar.h +297 -0
- neverlib/metrics/snr.py +5 -1
- neverlib/metrics/spec.py +31 -21
- neverlib/metrics/test_pesq.py +0 -4
- neverlib/tests/test_imports.py +17 -0
- neverlib/utils/__init__.py +26 -15
- neverlib/utils/audio_split.py +5 -1
- neverlib/utils/checkGPU.py +17 -9
- neverlib/utils/lazy_expose.py +29 -0
- neverlib/utils/utils.py +40 -12
- neverlib/vad/__init__.py +33 -25
- neverlib/vad/class_get_speech.py +1 -1
- neverlib/vad/class_vad.py +3 -3
- neverlib/vad/img.png +0 -0
- {neverlib-0.2.6.dist-info → neverlib-0.2.7.dist-info}/METADATA +1 -1
- {neverlib-0.2.6.dist-info → neverlib-0.2.7.dist-info}/RECORD +80 -37
- {neverlib-0.2.6.dist-info → neverlib-0.2.7.dist-info}/WHEEL +0 -0
- {neverlib-0.2.6.dist-info → neverlib-0.2.7.dist-info}/licenses/LICENSE +0 -0
- {neverlib-0.2.6.dist-info → neverlib-0.2.7.dist-info}/top_level.txt +0 -0
|
@@ -4,25 +4,20 @@ Dataset Analyzer Module
|
|
|
4
4
|
|
|
5
5
|
提供音频数据集批量分析和报告生成功能
|
|
6
6
|
"""
|
|
7
|
-
|
|
8
|
-
import numpy as np
|
|
9
|
-
import librosa
|
|
10
7
|
import os
|
|
11
8
|
import json
|
|
9
|
+
import librosa
|
|
10
|
+
import numpy as np
|
|
12
11
|
from pathlib import Path
|
|
13
|
-
from typing import List, Dict,
|
|
12
|
+
from typing import List, Dict, Optional, Union, Any
|
|
14
13
|
from dataclasses import dataclass, asdict
|
|
15
14
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
16
15
|
import multiprocessing
|
|
17
16
|
from tqdm import tqdm
|
|
18
|
-
import warnings
|
|
19
|
-
|
|
20
|
-
from .statistics import AudioStatistics
|
|
21
17
|
from .quality_metrics import QualityAnalyzer, audio_health_check
|
|
22
|
-
from .spectral_analysis import SpectralAnalyzer
|
|
23
|
-
from .temporal_features import TemporalAnalyzer
|
|
24
|
-
from
|
|
25
|
-
from .utils import rms_amplitude, dB
|
|
18
|
+
from .spectral_analysis import SpectralAnalyzer
|
|
19
|
+
from .temporal_features import TemporalAnalyzer
|
|
20
|
+
from ..utils import rms_amplitude, dB
|
|
26
21
|
|
|
27
22
|
|
|
28
23
|
@dataclass
|
|
@@ -35,26 +30,26 @@ class AudioFileInfo:
|
|
|
35
30
|
channels: int
|
|
36
31
|
bit_depth: Optional[int]
|
|
37
32
|
format: str
|
|
38
|
-
|
|
33
|
+
|
|
39
34
|
# 基础统计
|
|
40
35
|
max_amplitude: float
|
|
41
36
|
rms_amplitude: float
|
|
42
37
|
mean_amplitude: float
|
|
43
38
|
std_amplitude: float
|
|
44
|
-
|
|
39
|
+
|
|
45
40
|
# 质量指标
|
|
46
41
|
dynamic_range: float
|
|
47
42
|
snr_estimate: Optional[float]
|
|
48
43
|
has_clipping: bool
|
|
49
44
|
is_silent: bool
|
|
50
45
|
dc_offset: float
|
|
51
|
-
|
|
46
|
+
|
|
52
47
|
# 特征摘要
|
|
53
48
|
spectral_centroid_mean: float
|
|
54
49
|
spectral_rolloff_mean: float
|
|
55
50
|
zero_crossing_rate_mean: float
|
|
56
51
|
tempo: Optional[float]
|
|
57
|
-
|
|
52
|
+
|
|
58
53
|
# 健康状态
|
|
59
54
|
health_score: float # 0-100
|
|
60
55
|
issues: List[str]
|
|
@@ -63,52 +58,52 @@ class AudioFileInfo:
|
|
|
63
58
|
|
|
64
59
|
class DatasetAnalyzer:
|
|
65
60
|
"""数据集分析器类"""
|
|
66
|
-
|
|
61
|
+
|
|
67
62
|
def __init__(self, sr: int = 22050, n_jobs: int = None):
|
|
68
63
|
"""
|
|
69
64
|
初始化数据集分析器
|
|
70
|
-
|
|
65
|
+
|
|
71
66
|
Args:
|
|
72
67
|
sr: 目标采样率
|
|
73
68
|
n_jobs: 并行作业数量, None表示使用CPU核心数
|
|
74
69
|
"""
|
|
75
70
|
self.sr = sr
|
|
76
71
|
self.n_jobs = n_jobs or min(multiprocessing.cpu_count(), 8)
|
|
77
|
-
|
|
72
|
+
|
|
78
73
|
# 初始化分析器
|
|
79
74
|
self.quality_analyzer = QualityAnalyzer(sr=sr)
|
|
80
75
|
self.spectral_analyzer = SpectralAnalyzer(sr=sr)
|
|
81
76
|
self.temporal_analyzer = TemporalAnalyzer(sr=sr)
|
|
82
|
-
|
|
77
|
+
|
|
83
78
|
# 分析结果
|
|
84
79
|
self.file_infos: List[AudioFileInfo] = []
|
|
85
80
|
self.dataset_summary: Dict = {}
|
|
86
81
|
self.analysis_complete = False
|
|
87
|
-
|
|
82
|
+
|
|
88
83
|
def analyze_single_file(self, file_path: str) -> Optional[AudioFileInfo]:
|
|
89
84
|
"""
|
|
90
85
|
分析单个音频文件
|
|
91
|
-
|
|
86
|
+
|
|
92
87
|
Args:
|
|
93
88
|
file_path: 音频文件路径
|
|
94
|
-
|
|
89
|
+
|
|
95
90
|
Returns:
|
|
96
91
|
音频文件信息对象
|
|
97
92
|
"""
|
|
98
93
|
try:
|
|
99
94
|
# 加载音频
|
|
100
95
|
audio, original_sr = librosa.load(file_path, sr=None)
|
|
101
|
-
|
|
96
|
+
|
|
102
97
|
# 如果需要重采样
|
|
103
98
|
if self.sr != original_sr:
|
|
104
99
|
audio_resampled = librosa.resample(audio, orig_sr=original_sr, target_sr=self.sr)
|
|
105
100
|
else:
|
|
106
101
|
audio_resampled = audio
|
|
107
|
-
|
|
102
|
+
|
|
108
103
|
# 获取文件基本信息
|
|
109
104
|
file_size = os.path.getsize(file_path)
|
|
110
105
|
duration = len(audio) / original_sr
|
|
111
|
-
|
|
106
|
+
|
|
112
107
|
# 检测音频格式信息
|
|
113
108
|
try:
|
|
114
109
|
import soundfile as sf
|
|
@@ -120,21 +115,21 @@ class DatasetAnalyzer:
|
|
|
120
115
|
channels = 1 if len(audio.shape) == 1 else audio.shape[1]
|
|
121
116
|
bit_depth = None
|
|
122
117
|
format_info = Path(file_path).suffix.lower()
|
|
123
|
-
|
|
118
|
+
|
|
124
119
|
# 基础统计
|
|
125
120
|
max_amplitude = float(np.max(np.abs(audio_resampled)))
|
|
126
121
|
rms_amp = float(rms_amplitude(audio_resampled))
|
|
127
122
|
mean_amplitude = float(np.mean(np.abs(audio_resampled)))
|
|
128
123
|
std_amplitude = float(np.std(audio_resampled))
|
|
129
|
-
|
|
124
|
+
|
|
130
125
|
# 质量分析
|
|
131
126
|
dynamic_range = self.quality_analyzer.dynamic_range(audio_resampled)
|
|
132
127
|
dc_offset = float(np.mean(audio_resampled))
|
|
133
|
-
|
|
128
|
+
|
|
134
129
|
# 检测问题
|
|
135
130
|
has_clipping = max_amplitude >= 0.99
|
|
136
131
|
is_silent = mean_amplitude < 1e-6
|
|
137
|
-
|
|
132
|
+
|
|
138
133
|
# SNR估计(基于信号强度和噪声层)
|
|
139
134
|
snr_estimate = None
|
|
140
135
|
try:
|
|
@@ -150,7 +145,7 @@ class DatasetAnalyzer:
|
|
|
150
145
|
snr_estimate = 20 * np.log10(rms_amp / noise_rms)
|
|
151
146
|
except:
|
|
152
147
|
pass
|
|
153
|
-
|
|
148
|
+
|
|
154
149
|
# 频域特征
|
|
155
150
|
try:
|
|
156
151
|
spectral_centroid = self.spectral_analyzer.spectral_centroid(audio_resampled)
|
|
@@ -160,29 +155,29 @@ class DatasetAnalyzer:
|
|
|
160
155
|
except:
|
|
161
156
|
spectral_centroid_mean = 0.0
|
|
162
157
|
spectral_rolloff_mean = 0.0
|
|
163
|
-
|
|
158
|
+
|
|
164
159
|
# 时域特征
|
|
165
160
|
try:
|
|
166
161
|
zcr = self.temporal_analyzer.zero_crossing_rate(audio_resampled)
|
|
167
162
|
zcr_mean = float(np.mean(zcr))
|
|
168
|
-
|
|
163
|
+
|
|
169
164
|
# 节拍检测
|
|
170
165
|
tempo, _ = self.temporal_analyzer.tempo_estimation(audio_resampled)
|
|
171
166
|
tempo = float(tempo) if tempo > 0 else None
|
|
172
167
|
except:
|
|
173
168
|
zcr_mean = 0.0
|
|
174
169
|
tempo = None
|
|
175
|
-
|
|
170
|
+
|
|
176
171
|
# 健康检查
|
|
177
172
|
health_check = audio_health_check(audio_resampled, self.sr)
|
|
178
173
|
issues = health_check['issues']
|
|
179
174
|
warnings_list = health_check['warnings']
|
|
180
|
-
|
|
175
|
+
|
|
181
176
|
# 计算健康分数 (0-100)
|
|
182
177
|
health_score = 100.0
|
|
183
178
|
health_score -= len(issues) * 20 # 每个严重问题扣20分
|
|
184
179
|
health_score -= len(warnings_list) * 5 # 每个警告扣5分
|
|
185
|
-
|
|
180
|
+
|
|
186
181
|
if has_clipping:
|
|
187
182
|
health_score -= 15
|
|
188
183
|
if is_silent:
|
|
@@ -191,9 +186,9 @@ class DatasetAnalyzer:
|
|
|
191
186
|
health_score -= 10
|
|
192
187
|
if dynamic_range < 6:
|
|
193
188
|
health_score -= 10
|
|
194
|
-
|
|
189
|
+
|
|
195
190
|
health_score = max(0.0, min(100.0, health_score))
|
|
196
|
-
|
|
191
|
+
|
|
197
192
|
# 创建文件信息对象
|
|
198
193
|
file_info = AudioFileInfo(
|
|
199
194
|
file_path=file_path,
|
|
@@ -203,109 +198,109 @@ class DatasetAnalyzer:
|
|
|
203
198
|
channels=channels,
|
|
204
199
|
bit_depth=bit_depth,
|
|
205
200
|
format=format_info,
|
|
206
|
-
|
|
201
|
+
|
|
207
202
|
max_amplitude=max_amplitude,
|
|
208
203
|
rms_amplitude=rms_amp,
|
|
209
204
|
mean_amplitude=mean_amplitude,
|
|
210
205
|
std_amplitude=std_amplitude,
|
|
211
|
-
|
|
206
|
+
|
|
212
207
|
dynamic_range=dynamic_range,
|
|
213
208
|
snr_estimate=snr_estimate,
|
|
214
209
|
has_clipping=has_clipping,
|
|
215
210
|
is_silent=is_silent,
|
|
216
211
|
dc_offset=dc_offset,
|
|
217
|
-
|
|
212
|
+
|
|
218
213
|
spectral_centroid_mean=spectral_centroid_mean,
|
|
219
214
|
spectral_rolloff_mean=spectral_rolloff_mean,
|
|
220
215
|
zero_crossing_rate_mean=zcr_mean,
|
|
221
216
|
tempo=tempo,
|
|
222
|
-
|
|
217
|
+
|
|
223
218
|
health_score=health_score,
|
|
224
219
|
issues=issues,
|
|
225
220
|
warnings=warnings_list
|
|
226
221
|
)
|
|
227
|
-
|
|
222
|
+
|
|
228
223
|
return file_info
|
|
229
|
-
|
|
224
|
+
|
|
230
225
|
except Exception as e:
|
|
231
226
|
print(f"Error analyzing {file_path}: {str(e)}")
|
|
232
227
|
return None
|
|
233
|
-
|
|
228
|
+
|
|
234
229
|
def analyze_dataset(self, file_paths: List[str], show_progress: bool = True) -> Dict[str, Any]:
|
|
235
230
|
"""
|
|
236
231
|
批量分析数据集
|
|
237
|
-
|
|
232
|
+
|
|
238
233
|
Args:
|
|
239
234
|
file_paths: 音频文件路径列表
|
|
240
235
|
show_progress: 是否显示进度条
|
|
241
|
-
|
|
236
|
+
|
|
242
237
|
Returns:
|
|
243
238
|
分析结果摘要
|
|
244
239
|
"""
|
|
245
240
|
self.file_infos = []
|
|
246
|
-
|
|
241
|
+
|
|
247
242
|
# 并行处理文件
|
|
248
243
|
with ThreadPoolExecutor(max_workers=self.n_jobs) as executor:
|
|
249
244
|
# 提交所有任务
|
|
250
245
|
future_to_path = {
|
|
251
|
-
executor.submit(self.analyze_single_file, path): path
|
|
246
|
+
executor.submit(self.analyze_single_file, path): path
|
|
252
247
|
for path in file_paths
|
|
253
248
|
}
|
|
254
|
-
|
|
249
|
+
|
|
255
250
|
# 收集结果
|
|
256
251
|
if show_progress:
|
|
257
|
-
futures = tqdm(as_completed(future_to_path), total=len(file_paths),
|
|
258
|
-
|
|
252
|
+
futures = tqdm(as_completed(future_to_path), total=len(file_paths),
|
|
253
|
+
desc="分析音频文件")
|
|
259
254
|
else:
|
|
260
255
|
futures = as_completed(future_to_path)
|
|
261
|
-
|
|
256
|
+
|
|
262
257
|
for future in futures:
|
|
263
258
|
result = future.result()
|
|
264
259
|
if result is not None:
|
|
265
260
|
self.file_infos.append(result)
|
|
266
|
-
|
|
261
|
+
|
|
267
262
|
# 生成数据集摘要
|
|
268
263
|
self.dataset_summary = self._generate_dataset_summary()
|
|
269
264
|
self.analysis_complete = True
|
|
270
|
-
|
|
265
|
+
|
|
271
266
|
return self.dataset_summary
|
|
272
|
-
|
|
267
|
+
|
|
273
268
|
def _generate_dataset_summary(self) -> Dict[str, Any]:
|
|
274
269
|
"""
|
|
275
270
|
生成数据集摘要统计
|
|
276
|
-
|
|
271
|
+
|
|
277
272
|
Returns:
|
|
278
273
|
数据集摘要字典
|
|
279
274
|
"""
|
|
280
275
|
if not self.file_infos:
|
|
281
276
|
return {}
|
|
282
|
-
|
|
277
|
+
|
|
283
278
|
# 基础统计
|
|
284
279
|
total_files = len(self.file_infos)
|
|
285
280
|
total_duration = sum(info.duration for info in self.file_infos)
|
|
286
281
|
total_size = sum(info.file_size for info in self.file_infos)
|
|
287
|
-
|
|
282
|
+
|
|
288
283
|
# 格式统计
|
|
289
284
|
formats = {}
|
|
290
285
|
sample_rates = {}
|
|
291
286
|
channels_count = {}
|
|
292
|
-
|
|
287
|
+
|
|
293
288
|
for info in self.file_infos:
|
|
294
289
|
formats[info.format] = formats.get(info.format, 0) + 1
|
|
295
290
|
sample_rates[info.sample_rate] = sample_rates.get(info.sample_rate, 0) + 1
|
|
296
291
|
channels_count[info.channels] = channels_count.get(info.channels, 0) + 1
|
|
297
|
-
|
|
292
|
+
|
|
298
293
|
# 质量统计
|
|
299
294
|
health_scores = [info.health_score for info in self.file_infos]
|
|
300
295
|
problematic_files = [info for info in self.file_infos if info.health_score < 80]
|
|
301
296
|
silent_files = [info for info in self.file_infos if info.is_silent]
|
|
302
297
|
clipped_files = [info for info in self.file_infos if info.has_clipping]
|
|
303
|
-
|
|
298
|
+
|
|
304
299
|
# 音频特征统计
|
|
305
300
|
durations = [info.duration for info in self.file_infos]
|
|
306
301
|
rms_values = [info.rms_amplitude for info in self.file_infos]
|
|
307
302
|
dynamic_ranges = [info.dynamic_range for info in self.file_infos]
|
|
308
|
-
|
|
303
|
+
|
|
309
304
|
# 生成摘要
|
|
310
305
|
summary = {
|
|
311
306
|
'overview': {
|
|
@@ -315,13 +310,13 @@ class DatasetAnalyzer:
|
|
|
315
310
|
'average_file_duration': np.mean(durations),
|
|
316
311
|
'analysis_target_sr': self.sr
|
|
317
312
|
},
|
|
318
|
-
|
|
313
|
+
|
|
319
314
|
'format_distribution': {
|
|
320
315
|
'formats': formats,
|
|
321
316
|
'sample_rates': sample_rates,
|
|
322
317
|
'channels': channels_count
|
|
323
318
|
},
|
|
324
|
-
|
|
319
|
+
|
|
325
320
|
'duration_statistics': {
|
|
326
321
|
'mean': np.mean(durations),
|
|
327
322
|
'median': np.median(durations),
|
|
@@ -335,7 +330,7 @@ class DatasetAnalyzer:
|
|
|
335
330
|
'95th': np.percentile(durations, 95)
|
|
336
331
|
}
|
|
337
332
|
},
|
|
338
|
-
|
|
333
|
+
|
|
339
334
|
'quality_assessment': {
|
|
340
335
|
'average_health_score': np.mean(health_scores),
|
|
341
336
|
'problematic_files_count': len(problematic_files),
|
|
@@ -350,7 +345,7 @@ class DatasetAnalyzer:
|
|
|
350
345
|
'bad (<60)': len([s for s in health_scores if s < 60])
|
|
351
346
|
}
|
|
352
347
|
},
|
|
353
|
-
|
|
348
|
+
|
|
354
349
|
'audio_characteristics': {
|
|
355
350
|
'rms_statistics': {
|
|
356
351
|
'mean_linear': np.mean(rms_values),
|
|
@@ -367,91 +362,91 @@ class DatasetAnalyzer:
|
|
|
367
362
|
'max': np.max(dynamic_ranges)
|
|
368
363
|
}
|
|
369
364
|
},
|
|
370
|
-
|
|
365
|
+
|
|
371
366
|
'recommendations': self._generate_recommendations()
|
|
372
367
|
}
|
|
373
|
-
|
|
368
|
+
|
|
374
369
|
return summary
|
|
375
|
-
|
|
370
|
+
|
|
376
371
|
def _generate_recommendations(self) -> List[str]:
|
|
377
372
|
"""
|
|
378
373
|
基于分析结果生成改进建议
|
|
379
|
-
|
|
374
|
+
|
|
380
375
|
Returns:
|
|
381
376
|
建议列表
|
|
382
377
|
"""
|
|
383
378
|
recommendations = []
|
|
384
|
-
|
|
379
|
+
|
|
385
380
|
if not self.file_infos:
|
|
386
381
|
return recommendations
|
|
387
|
-
|
|
382
|
+
|
|
388
383
|
# 检查质量问题
|
|
389
384
|
problematic_count = len([info for info in self.file_infos if info.health_score < 80])
|
|
390
385
|
if problematic_count > 0:
|
|
391
386
|
recommendations.append(f"发现 {problematic_count} 个文件存在质量问题, 建议进行质量检查和修复")
|
|
392
|
-
|
|
387
|
+
|
|
393
388
|
# 检查削波
|
|
394
389
|
clipped_count = len([info for info in self.file_infos if info.has_clipping])
|
|
395
390
|
if clipped_count > 0:
|
|
396
391
|
recommendations.append(f"发现 {clipped_count} 个文件存在削波, 建议重新录制或降低增益")
|
|
397
|
-
|
|
392
|
+
|
|
398
393
|
# 检查静音文件
|
|
399
394
|
silent_count = len([info for info in self.file_infos if info.is_silent])
|
|
400
395
|
if silent_count > 0:
|
|
401
396
|
recommendations.append(f"发现 {silent_count} 个静音文件, 建议移除或重新录制")
|
|
402
|
-
|
|
397
|
+
|
|
403
398
|
# 检查采样率一致性
|
|
404
399
|
sample_rates = set(info.sample_rate for info in self.file_infos)
|
|
405
400
|
if len(sample_rates) > 1:
|
|
406
401
|
recommendations.append(f"数据集包含多种采样率 {sample_rates}, 建议统一采样率")
|
|
407
|
-
|
|
402
|
+
|
|
408
403
|
# 检查动态范围
|
|
409
404
|
low_dr_count = len([info for info in self.file_infos if info.dynamic_range < 20])
|
|
410
405
|
if low_dr_count > len(self.file_infos) * 0.2: # 超过20%的文件动态范围过低
|
|
411
406
|
recommendations.append("大量文件动态范围过低, 可能影响音频质量")
|
|
412
|
-
|
|
407
|
+
|
|
413
408
|
# 检查时长分布
|
|
414
409
|
durations = [info.duration for info in self.file_infos]
|
|
415
410
|
duration_std = np.std(durations)
|
|
416
411
|
duration_mean = np.mean(durations)
|
|
417
412
|
if duration_std / duration_mean > 0.5: # 变异系数大于0.5
|
|
418
413
|
recommendations.append("文件时长分布不均匀, 可能影响训练效果")
|
|
419
|
-
|
|
414
|
+
|
|
420
415
|
return recommendations
|
|
421
|
-
|
|
416
|
+
|
|
422
417
|
def get_problematic_files(self, min_health_score: float = 80) -> List[AudioFileInfo]:
|
|
423
418
|
"""
|
|
424
419
|
获取有问题的文件列表
|
|
425
|
-
|
|
420
|
+
|
|
426
421
|
Args:
|
|
427
422
|
min_health_score: 最低健康分数阈值
|
|
428
|
-
|
|
423
|
+
|
|
429
424
|
Returns:
|
|
430
425
|
问题文件列表
|
|
431
426
|
"""
|
|
432
427
|
return [info for info in self.file_infos if info.health_score < min_health_score]
|
|
433
|
-
|
|
428
|
+
|
|
434
429
|
def export_results(self, output_dir: str):
|
|
435
430
|
"""
|
|
436
431
|
导出分析结果
|
|
437
|
-
|
|
432
|
+
|
|
438
433
|
Args:
|
|
439
434
|
output_dir: 输出目录
|
|
440
435
|
"""
|
|
441
436
|
output_path = Path(output_dir)
|
|
442
437
|
output_path.mkdir(parents=True, exist_ok=True)
|
|
443
|
-
|
|
438
|
+
|
|
444
439
|
# 导出摘要
|
|
445
440
|
summary_path = output_path / 'dataset_summary.json'
|
|
446
441
|
with open(summary_path, 'w', encoding='utf-8') as f:
|
|
447
442
|
json.dump(self.dataset_summary, f, indent=2, ensure_ascii=False, default=str)
|
|
448
|
-
|
|
443
|
+
|
|
449
444
|
# 导出详细文件信息
|
|
450
445
|
details_path = output_path / 'file_details.json'
|
|
451
446
|
file_details = [asdict(info) for info in self.file_infos]
|
|
452
447
|
with open(details_path, 'w', encoding='utf-8') as f:
|
|
453
448
|
json.dump(file_details, f, indent=2, ensure_ascii=False, default=str)
|
|
454
|
-
|
|
449
|
+
|
|
455
450
|
# 导出问题文件列表
|
|
456
451
|
problematic_files = self.get_problematic_files()
|
|
457
452
|
if problematic_files:
|
|
@@ -459,35 +454,35 @@ class DatasetAnalyzer:
|
|
|
459
454
|
problems_data = [asdict(info) for info in problematic_files]
|
|
460
455
|
with open(problems_path, 'w', encoding='utf-8') as f:
|
|
461
456
|
json.dump(problems_data, f, indent=2, ensure_ascii=False, default=str)
|
|
462
|
-
|
|
457
|
+
|
|
463
458
|
print(f"分析结果已导出到: {output_path}")
|
|
464
|
-
|
|
459
|
+
|
|
465
460
|
def create_analysis_report(self, output_path: str):
|
|
466
461
|
"""
|
|
467
462
|
创建HTML分析报告
|
|
468
|
-
|
|
463
|
+
|
|
469
464
|
Args:
|
|
470
465
|
output_path: 输出HTML文件路径
|
|
471
466
|
"""
|
|
472
467
|
if not self.analysis_complete:
|
|
473
468
|
raise ValueError("请先完成数据集分析")
|
|
474
|
-
|
|
469
|
+
|
|
475
470
|
html_content = self._generate_html_report()
|
|
476
|
-
|
|
471
|
+
|
|
477
472
|
with open(output_path, 'w', encoding='utf-8') as f:
|
|
478
473
|
f.write(html_content)
|
|
479
|
-
|
|
474
|
+
|
|
480
475
|
print(f"HTML报告已生成: {output_path}")
|
|
481
|
-
|
|
476
|
+
|
|
482
477
|
def _generate_html_report(self) -> str:
|
|
483
478
|
"""
|
|
484
479
|
生成HTML格式的分析报告
|
|
485
|
-
|
|
480
|
+
|
|
486
481
|
Returns:
|
|
487
482
|
HTML内容字符串
|
|
488
483
|
"""
|
|
489
484
|
summary = self.dataset_summary
|
|
490
|
-
|
|
485
|
+
|
|
491
486
|
html = f"""
|
|
492
487
|
<!DOCTYPE html>
|
|
493
488
|
<html lang="zh-CN">
|
|
@@ -511,7 +506,7 @@ class DatasetAnalyzer:
|
|
|
511
506
|
<h1>音频数据集分析报告</h1>
|
|
512
507
|
<p>生成时间: {pd.Timestamp.now().strftime('%Y-%m-%d %H:%M:%S') if 'pd' in globals() else 'N/A'}</p>
|
|
513
508
|
</div>
|
|
514
|
-
|
|
509
|
+
|
|
515
510
|
<div class="section">
|
|
516
511
|
<h2>数据集概览</h2>
|
|
517
512
|
<div class="metric">文件总数: {summary['overview']['total_files']}</div>
|
|
@@ -519,7 +514,7 @@ class DatasetAnalyzer:
|
|
|
519
514
|
<div class="metric">总大小: {summary['overview']['total_size_mb']:.2f} MB</div>
|
|
520
515
|
<div class="metric">平均文件时长: {summary['overview']['average_file_duration']:.2f} 秒</div>
|
|
521
516
|
</div>
|
|
522
|
-
|
|
517
|
+
|
|
523
518
|
<div class="section">
|
|
524
519
|
<h2>质量评估</h2>
|
|
525
520
|
<div class="metric">平均健康分数: {summary['quality_assessment']['average_health_score']:.1f}/100</div>
|
|
@@ -528,63 +523,63 @@ class DatasetAnalyzer:
|
|
|
528
523
|
<div class="metric">静音文件: {summary['quality_assessment']['silent_files_count']}</div>
|
|
529
524
|
<div class="metric">削波文件: {summary['quality_assessment']['clipped_files_count']}</div>
|
|
530
525
|
</div>
|
|
531
|
-
|
|
526
|
+
|
|
532
527
|
<div class="section">
|
|
533
528
|
<h2>改进建议</h2>
|
|
534
529
|
"""
|
|
535
|
-
|
|
530
|
+
|
|
536
531
|
for rec in summary['recommendations']:
|
|
537
532
|
html += f'<div class="recommendation">• {rec}</div>'
|
|
538
|
-
|
|
533
|
+
|
|
539
534
|
html += """
|
|
540
535
|
</div>
|
|
541
536
|
</body>
|
|
542
537
|
</html>
|
|
543
538
|
"""
|
|
544
|
-
|
|
539
|
+
|
|
545
540
|
return html
|
|
546
541
|
|
|
547
542
|
|
|
548
|
-
def analyze_audio_dataset(directory: str, output_dir: str = None,
|
|
549
|
-
|
|
550
|
-
|
|
543
|
+
def analyze_audio_dataset(directory: str, output_dir: str = None,
|
|
544
|
+
extensions: List[str] = None, sr: int = 22050,
|
|
545
|
+
n_jobs: int = None) -> Dict[str, Any]:
|
|
551
546
|
"""
|
|
552
547
|
快速分析音频数据集
|
|
553
|
-
|
|
548
|
+
|
|
554
549
|
Args:
|
|
555
550
|
directory: 音频文件目录
|
|
556
551
|
output_dir: 输出目录(可选)
|
|
557
552
|
extensions: 支持的文件扩展名
|
|
558
553
|
sr: 目标采样率
|
|
559
554
|
n_jobs: 并行作业数
|
|
560
|
-
|
|
555
|
+
|
|
561
556
|
Returns:
|
|
562
557
|
分析结果摘要
|
|
563
558
|
"""
|
|
564
559
|
if extensions is None:
|
|
565
560
|
extensions = ['.wav', '.mp3', '.flac', '.m4a', '.aac']
|
|
566
|
-
|
|
561
|
+
|
|
567
562
|
# 收集文件
|
|
568
563
|
directory_path = Path(directory)
|
|
569
564
|
file_paths = []
|
|
570
565
|
for ext in extensions:
|
|
571
566
|
file_paths.extend(list(directory_path.glob(f'**/*{ext}')))
|
|
572
|
-
|
|
567
|
+
|
|
573
568
|
file_paths = [str(p) for p in file_paths]
|
|
574
|
-
|
|
569
|
+
|
|
575
570
|
if not file_paths:
|
|
576
571
|
raise ValueError(f"在目录 {directory} 中未找到音频文件")
|
|
577
|
-
|
|
572
|
+
|
|
578
573
|
# 分析数据集
|
|
579
574
|
analyzer = DatasetAnalyzer(sr=sr, n_jobs=n_jobs)
|
|
580
575
|
results = analyzer.analyze_dataset(file_paths)
|
|
581
|
-
|
|
576
|
+
|
|
582
577
|
# 导出结果
|
|
583
578
|
if output_dir:
|
|
584
579
|
analyzer.export_results(output_dir)
|
|
585
|
-
|
|
580
|
+
|
|
586
581
|
# 生成HTML报告
|
|
587
582
|
html_path = Path(output_dir) / 'analysis_report.html'
|
|
588
583
|
analyzer.create_analysis_report(str(html_path))
|
|
589
|
-
|
|
590
|
-
return results
|
|
584
|
+
|
|
585
|
+
return results
|