neverlib 0.2.2__py3-none-any.whl → 0.2.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- neverlib/.history/Docs/audio_aug/test_snr_20250806011311.py +0 -0
- neverlib/.history/Docs/audio_aug/test_snr_20250806011331.py +75 -0
- neverlib/.history/Docs/audio_aug/test_snr_20250806011342.py +57 -0
- neverlib/.history/Docs/audio_aug/test_snr_20250806011352.py +57 -0
- neverlib/.history/Docs/audio_aug/test_snr_20250806011403.py +57 -0
- neverlib/.history/Docs/audio_aug/test_snr_20250806011413.py +57 -0
- neverlib/.history/Docs/audio_aug/test_snr_20250806011435.py +55 -0
- neverlib/.history/Docs/vad/1_20250810032405.py +0 -0
- neverlib/.history/Docs/vad/1_20250810032417.py +39 -0
- neverlib/.history/audio_aug/audio_aug_20250806010451.py +125 -0
- neverlib/.history/audio_aug/audio_aug_20250806010750.py +138 -0
- neverlib/.history/audio_aug/audio_aug_20250806010759.py +140 -0
- neverlib/.history/audio_aug/audio_aug_20250806010803.py +140 -0
- neverlib/.history/audio_aug/audio_aug_20250806010809.py +140 -0
- neverlib/.history/audio_aug/audio_aug_20250806011108.py +140 -0
- neverlib/.history/dataAnalyze/__init___20250805234204.py +87 -0
- neverlib/.history/dataAnalyze/__init___20250806204125.py +14 -0
- neverlib/.history/dataAnalyze/__init___20250806204139.py +14 -0
- neverlib/.history/dataAnalyze/__init___20250806204159.py +14 -0
- neverlib/.history/filter/__init___20250820103351.py +70 -0
- neverlib/.history/filter/__init___20250821102348.py +70 -0
- neverlib/.history/filter/__init___20250821102405.py +14 -0
- neverlib/.history/filter/auto_eq/__init___20250819213121.py +36 -0
- neverlib/.history/filter/auto_eq/__init___20250821102241.py +36 -0
- neverlib/.history/filter/auto_eq/__init___20250821102259.py +36 -0
- neverlib/.history/filter/auto_eq/__init___20250821102307.py +36 -0
- neverlib/.history/filter/auto_eq/__init___20250821102310.py +36 -0
- neverlib/.history/filter/auto_eq/__init___20250821102318.py +36 -0
- neverlib/.history/filter/auto_eq/__init___20250821102507.py +36 -0
- neverlib/.history/filter/auto_eq/de_eq_20250820103848.py +361 -0
- neverlib/.history/filter/auto_eq/de_eq_20250821102422.py +360 -0
- neverlib/.history/filter/auto_eq/freq_eq_20250805234206.py +75 -0
- neverlib/.history/filter/auto_eq/freq_eq_20250820140732.py +75 -0
- neverlib/.history/filter/auto_eq/freq_eq_20250820140745.py +75 -0
- neverlib/.history/filter/auto_eq/freq_eq_20250820140816.py +75 -0
- neverlib/.history/filter/auto_eq/freq_eq_20250820140938.py +77 -0
- neverlib/.history/filter/auto_eq/freq_eq_20250820141003.py +77 -0
- neverlib/.history/filter/auto_eq/freq_eq_20250820141006.py +77 -0
- neverlib/.history/filter/auto_eq/freq_eq_20250820141019.py +77 -0
- neverlib/.history/filter/auto_eq/freq_eq_20250820141049.py +77 -0
- neverlib/.history/filter/auto_eq/freq_eq_20250820141211.py +77 -0
- neverlib/.history/filter/auto_eq/freq_eq_20250820141227.py +77 -0
- neverlib/.history/filter/auto_eq/freq_eq_20250820141311.py +78 -0
- neverlib/.history/filter/auto_eq/freq_eq_20250820141340.py +78 -0
- neverlib/.history/filter/auto_eq/freq_eq_20250820141712.py +78 -0
- neverlib/.history/filter/auto_eq/freq_eq_20250820141733.py +78 -0
- neverlib/.history/filter/auto_eq/freq_eq_20250820141755.py +78 -0
- neverlib/.history/filter/auto_eq/freq_eq_20250821102434.py +76 -0
- neverlib/.history/filter/auto_eq/freq_eq_20250821102500.py +76 -0
- neverlib/.history/filter/auto_eq/freq_eq_20250821102502.py +76 -0
- neverlib/.history/filter/auto_eq/ga_eq_basic_20250820102957.py +380 -0
- neverlib/.history/filter/auto_eq/ga_eq_basic_20250820113054.py +380 -0
- neverlib/.history/filter/auto_eq/ga_eq_basic_20250820113150.py +380 -0
- neverlib/.history/filter/auto_eq/ga_eq_basic_20250820113520.py +385 -0
- neverlib/.history/filter/auto_eq/ga_eq_basic_20250820113525.py +385 -0
- neverlib/.history/filter/auto_eq/ga_eq_basic_20250821102212.py +385 -0
- neverlib/.history/metrics/dnsmos_20250806001612.py +160 -0
- neverlib/.history/metrics/dnsmos_20250815180659.py +160 -0
- neverlib/.history/metrics/dnsmos_20250815180701.py +158 -0
- neverlib/.history/metrics/dnsmos_20250815181321.py +154 -0
- neverlib/.history/metrics/dnsmos_20250815181327.py +154 -0
- neverlib/.history/metrics/dnsmos_20250815181331.py +154 -0
- neverlib/.history/metrics/dnsmos_20250815181620.py +154 -0
- neverlib/.history/metrics/dnsmos_20250815181631.py +154 -0
- neverlib/.history/metrics/dnsmos_20250815181742.py +154 -0
- neverlib/.history/metrics/dnsmos_20250815181824.py +153 -0
- neverlib/.history/metrics/dnsmos_20250815181834.py +153 -0
- neverlib/.history/metrics/dnsmos_20250815181922.py +153 -0
- neverlib/.history/metrics/dnsmos_20250815182011.py +147 -0
- neverlib/.history/metrics/dnsmos_20250815182036.py +144 -0
- neverlib/.history/metrics/dnsmos_20250815182936.py +143 -0
- neverlib/.history/metrics/dnsmos_20250815182942.py +143 -0
- neverlib/.history/metrics/dnsmos_20250815183032.py +137 -0
- neverlib/.history/metrics/dnsmos_20250815183101.py +144 -0
- neverlib/.history/metrics/dnsmos_20250815183121.py +144 -0
- neverlib/.history/metrics/dnsmos_20250815183123.py +143 -0
- neverlib/.history/metrics/dnsmos_20250815183214.py +143 -0
- neverlib/.history/metrics/dnsmos_20250815183240.py +143 -0
- neverlib/.history/metrics/dnsmos_20250815183248.py +144 -0
- neverlib/.history/metrics/dnsmos_20250815183407.py +142 -0
- neverlib/.history/metrics/dnsmos_20250815183409.py +142 -0
- neverlib/.history/metrics/dnsmos_20250815183431.py +142 -0
- neverlib/.history/metrics/dnsmos_20250815183507.py +140 -0
- neverlib/.history/metrics/dnsmos_20250815183513.py +139 -0
- neverlib/.history/metrics/dnsmos_20250815183618.py +139 -0
- neverlib/.history/metrics/dnsmos_20250815183709.py +140 -0
- neverlib/.history/metrics/dnsmos_20250815183756.py +137 -0
- neverlib/.history/metrics/dnsmos_20250815183815.py +128 -0
- neverlib/.history/metrics/dnsmos_20250815183827.py +129 -0
- neverlib/.history/metrics/dnsmos_20250815183913.py +117 -0
- neverlib/.history/metrics/dnsmos_20250815183914.py +117 -0
- neverlib/.history/metrics/dnsmos_20250815184003.py +118 -0
- neverlib/.history/metrics/dnsmos_20250815184040.py +118 -0
- neverlib/.history/metrics/dnsmos_20250815184049.py +118 -0
- neverlib/.history/metrics/dnsmos_20250815184104.py +117 -0
- neverlib/.history/metrics/dnsmos_20250815184200.py +117 -0
- neverlib/.history/metrics/lpc_lsp_metric_20250816015944.py +128 -0
- neverlib/.history/metrics/lpc_lsp_metric_20250816020142.py +128 -0
- neverlib/.history/metrics/lpc_lsp_metric_20250816020156.py +128 -0
- neverlib/.history/metrics/lpc_lsp_metric_20250816020554.py +130 -0
- neverlib/.history/metrics/lpc_lsp_metric_20250816020600.py +125 -0
- neverlib/.history/metrics/lpc_lsp_metric_20250816020631.py +120 -0
- neverlib/.history/metrics/lpc_lsp_metric_20250816020746.py +118 -0
- neverlib/.history/metrics/lpc_me_20250816013111.py +0 -0
- neverlib/.history/metrics/lpc_me_20250816013129.py +121 -0
- neverlib/.history/metrics/lpc_me_20250816015430.py +103 -0
- neverlib/.history/metrics/lpc_me_20250816015535.py +96 -0
- neverlib/.history/metrics/lpc_me_20250816015542.py +96 -0
- neverlib/.history/metrics/lpc_me_20250816015636.py +97 -0
- neverlib/.history/metrics/lpc_me_20250816015658.py +104 -0
- neverlib/.history/metrics/lpc_me_20250816015703.py +100 -0
- neverlib/.history/metrics/lpc_me_20250816015945.py +128 -0
- neverlib/.history/metrics/snr_20250806010538.py +177 -0
- neverlib/.history/metrics/snr_20250806211634.py +184 -0
- neverlib/.history/metrics/spec_20250805234209.py +45 -0
- neverlib/.history/metrics/spec_20250816135530.py +11 -0
- neverlib/.history/metrics/spec_20250816135654.py +16 -0
- neverlib/.history/metrics/spec_20250816135736.py +68 -0
- neverlib/.history/metrics/spec_20250816135904.py +75 -0
- neverlib/.history/metrics/spec_20250816135921.py +82 -0
- neverlib/.history/metrics/spec_20250816140111.py +82 -0
- neverlib/.history/metrics/spec_20250816140543.py +136 -0
- neverlib/.history/metrics/spec_20250816140559.py +172 -0
- neverlib/.history/metrics/spec_20250816140602.py +172 -0
- neverlib/.history/metrics/spec_20250816140608.py +172 -0
- neverlib/.history/metrics/spec_20250816140654.py +148 -0
- neverlib/.history/metrics/spec_20250816140705.py +144 -0
- neverlib/.history/metrics/spec_20250816140755.py +138 -0
- neverlib/.history/metrics/spec_20250816140823.py +170 -0
- neverlib/.history/metrics/spec_20250816140832.py +170 -0
- neverlib/.history/metrics/spec_20250816140833.py +170 -0
- neverlib/.history/metrics/spec_20250816140922.py +147 -0
- neverlib/.history/metrics/spec_20250816141148.py +107 -0
- neverlib/.history/metrics/spec_20250816141219.py +123 -0
- neverlib/.history/metrics/spec_20250816141732.py +178 -0
- neverlib/.history/metrics/spec_20250816141740.py +178 -0
- neverlib/.history/metrics/spec_20250816142030.py +178 -0
- neverlib/.history/metrics/spec_20250816142107.py +135 -0
- neverlib/.history/metrics/spec_20250816142126.py +135 -0
- neverlib/.history/metrics/spec_20250816142410.py +135 -0
- neverlib/.history/metrics/spec_20250816142415.py +136 -0
- neverlib/.history/metrics/spec_metric_20250816135156.py +0 -0
- neverlib/.history/metrics/spec_metric_20250816135226.py +5 -0
- neverlib/.history/metrics/spec_metric_20250816135227.py +10 -0
- neverlib/.history/metrics/spec_metric_20250816135306.py +15 -0
- neverlib/.history/metrics/spec_metric_20250816135442.py +31 -0
- neverlib/.history/metrics/spec_metric_20250816135448.py +31 -0
- neverlib/.history/metrics/spec_metric_20250816135520.py +29 -0
- neverlib/.history/metrics/spec_metric_20250816135537.py +63 -0
- neverlib/.history/metrics/spec_metric_20250816135653.py +65 -0
- neverlib/.history/vad/PreProcess_20250805234211.py +63 -0
- neverlib/.history/vad/PreProcess_20250809232455.py +63 -0
- neverlib/.history/vad/PreProcess_20250816020725.py +66 -0
- neverlib/.history/vad/VAD_Silero_20250805234211.py +50 -0
- neverlib/.history/vad/VAD_Silero_20250809232456.py +50 -0
- neverlib/.history/vad/VAD_WebRTC_20250805234211.py +61 -0
- neverlib/.history/vad/VAD_WebRTC_20250809232456.py +61 -0
- neverlib/.history/vad/VAD_funasr_20250805234211.py +54 -0
- neverlib/.history/vad/VAD_funasr_20250809232456.py +54 -0
- neverlib/.history/vad/VAD_vadlib_20250805234211.py +70 -0
- neverlib/.history/vad/VAD_vadlib_20250809232455.py +70 -0
- neverlib/.history/vad/VAD_whisper_20250805234211.py +55 -0
- neverlib/.history/vad/VAD_whisper_20250809232456.py +55 -0
- neverlib/.specstory/.what-is-this.md +69 -0
- neverlib/.specstory/history/2025-08-05_17-06Z-/350/277/231/344/270/200/346/255/245/347/232/204/347/233/256/347/232/204/346/230/257/344/273/200/344/271/210.md +424 -0
- neverlib/Docs/audio_aug/test_snr.py +55 -0
- neverlib/__init__.py +2 -2
- neverlib/audio_aug/HarmonicDistortion.py +79 -0
- neverlib/audio_aug/TFDrop.py +41 -0
- neverlib/audio_aug/TFMask.py +56 -0
- neverlib/audio_aug/__init__.py +1 -1
- neverlib/audio_aug/audio_aug.py +19 -5
- neverlib/audio_aug/clip_aug.py +41 -0
- neverlib/audio_aug/coder_aug.py +209 -0
- neverlib/audio_aug/coder_aug2.py +118 -0
- neverlib/audio_aug/loss_packet_aug.py +103 -0
- neverlib/audio_aug/quant_aug.py +78 -0
- neverlib/data_analyze/README.md +234 -0
- neverlib/data_analyze/__init__.py +14 -0
- neverlib/data_analyze/dataset_analyzer.py +590 -0
- neverlib/data_analyze/quality_metrics.py +364 -0
- neverlib/data_analyze/rms_distrubution.py +62 -0
- neverlib/data_analyze/spectral_analysis.py +218 -0
- neverlib/data_analyze/statistics.py +406 -0
- neverlib/data_analyze/temporal_features.py +126 -0
- neverlib/data_analyze/visualization.py +468 -0
- neverlib/filter/README.md +101 -0
- neverlib/filter/__init__.py +7 -0
- neverlib/filter/auto_eq/README.md +165 -0
- neverlib/filter/auto_eq/__init__.py +36 -0
- neverlib/filter/auto_eq/de_eq.py +360 -0
- neverlib/filter/auto_eq/freq_eq.py +76 -0
- neverlib/filter/auto_eq/ga_eq_advanced.py +577 -0
- neverlib/filter/auto_eq/ga_eq_basic.py +385 -0
- neverlib/filter/biquad.py +45 -0
- neverlib/filter/common.py +5 -6
- neverlib/filter/core.py +339 -0
- neverlib/metrics/dnsmos.py +117 -0
- neverlib/metrics/lpc_lsp.py +118 -0
- neverlib/metrics/snr.py +184 -0
- neverlib/metrics/spec.py +136 -0
- neverlib/metrics/test_pesq.py +35 -0
- neverlib/metrics/time.py +68 -0
- neverlib/tests/test_vad.py +21 -0
- neverlib/utils/audio_split.py +2 -1
- neverlib/utils/message.py +4 -4
- neverlib/utils/utils.py +36 -16
- neverlib/vad/PreProcess.py +6 -3
- neverlib/vad/README.md +10 -10
- neverlib/vad/VAD_Energy.py +1 -1
- neverlib/vad/VAD_Silero.py +2 -2
- neverlib/vad/VAD_WebRTC.py +2 -2
- neverlib/vad/VAD_funasr.py +2 -2
- neverlib/vad/VAD_statistics.py +3 -3
- neverlib/vad/VAD_vadlib.py +3 -3
- neverlib/vad/VAD_whisper.py +2 -2
- neverlib/vad/__init__.py +1 -1
- neverlib/vad/class_get_speech.py +4 -4
- neverlib/vad/class_vad.py +1 -1
- neverlib/vad/utils.py +47 -5
- {neverlib-0.2.2.dist-info → neverlib-0.2.4.dist-info}/METADATA +120 -120
- neverlib-0.2.4.dist-info/RECORD +229 -0
- {neverlib-0.2.2.dist-info → neverlib-0.2.4.dist-info}/WHEEL +1 -1
- neverlib/Documents/vad/VAD_Energy.ipynb +0 -159
- neverlib/Documents/vad/VAD_Silero.ipynb +0 -305
- neverlib/Documents/vad/VAD_WebRTC.ipynb +0 -183
- neverlib/Documents/vad/VAD_funasr.ipynb +0 -179
- neverlib/Documents/vad/VAD_ppasr.ipynb +0 -175
- neverlib/Documents/vad/VAD_statistics.ipynb +0 -522
- neverlib/Documents/vad/VAD_vadlib.ipynb +0 -184
- neverlib/Documents/vad/VAD_whisper.ipynb +0 -430
- neverlib/utils/waveform_analyzer.py +0 -51
- neverlib/wav_data/000_short.wav +0 -0
- neverlib-0.2.2.dist-info/RECORD +0 -40
- {neverlib-0.2.2.dist-info → neverlib-0.2.4.dist-info}/licenses/LICENSE +0 -0
- {neverlib-0.2.2.dist-info → neverlib-0.2.4.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,406 @@
|
|
|
1
|
+
"""
|
|
2
|
+
统计分析工具模块
|
|
3
|
+
Statistics Analysis Module
|
|
4
|
+
|
|
5
|
+
提供音频数据集统计分析功能
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import numpy as np
|
|
9
|
+
import librosa
|
|
10
|
+
import os
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import List, Dict, Tuple, Optional, Union
|
|
13
|
+
import matplotlib.pyplot as plt
|
|
14
|
+
from collections import defaultdict
|
|
15
|
+
import json
|
|
16
|
+
from .utils import rms_amplitude, dB
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class AudioStatistics:
|
|
20
|
+
"""音频统计分析类"""
|
|
21
|
+
|
|
22
|
+
def __init__(self, sr: int = 22050):
|
|
23
|
+
"""
|
|
24
|
+
初始化统计分析器
|
|
25
|
+
|
|
26
|
+
Args:
|
|
27
|
+
sr: 采样率
|
|
28
|
+
"""
|
|
29
|
+
self.sr = sr
|
|
30
|
+
self.audio_data = []
|
|
31
|
+
self.file_paths = []
|
|
32
|
+
self.statistics = {}
|
|
33
|
+
|
|
34
|
+
def add_audio_file(self, file_path: str, audio_data: Optional[np.ndarray] = None):
|
|
35
|
+
"""
|
|
36
|
+
添加音频文件到分析列表
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
file_path: 音频文件路径
|
|
40
|
+
audio_data: 音频数据(如果不提供则从文件加载)
|
|
41
|
+
"""
|
|
42
|
+
if audio_data is None:
|
|
43
|
+
try:
|
|
44
|
+
audio_data, _ = librosa.load(file_path, sr=self.sr)
|
|
45
|
+
except Exception as e:
|
|
46
|
+
print(f"Error loading {file_path}: {e}")
|
|
47
|
+
return
|
|
48
|
+
|
|
49
|
+
self.audio_data.append(audio_data)
|
|
50
|
+
self.file_paths.append(file_path)
|
|
51
|
+
|
|
52
|
+
def add_audio_directory(self, directory: str, extensions: List[str] = None):
|
|
53
|
+
"""
|
|
54
|
+
批量添加目录中的音频文件
|
|
55
|
+
|
|
56
|
+
Args:
|
|
57
|
+
directory: 音频文件目录
|
|
58
|
+
extensions: 支持的文件扩展名
|
|
59
|
+
"""
|
|
60
|
+
if extensions is None:
|
|
61
|
+
extensions = ['.wav', '.mp3', '.flac', '.m4a', '.aac']
|
|
62
|
+
|
|
63
|
+
directory = Path(directory)
|
|
64
|
+
for ext in extensions:
|
|
65
|
+
for file_path in directory.glob(f'*{ext}'):
|
|
66
|
+
self.add_audio_file(str(file_path))
|
|
67
|
+
|
|
68
|
+
def compute_duration_statistics(self) -> Dict:
|
|
69
|
+
"""
|
|
70
|
+
计算音频时长统计
|
|
71
|
+
|
|
72
|
+
Returns:
|
|
73
|
+
时长统计信息
|
|
74
|
+
"""
|
|
75
|
+
durations = [len(audio) / self.sr for audio in self.audio_data]
|
|
76
|
+
|
|
77
|
+
if not durations:
|
|
78
|
+
return {}
|
|
79
|
+
|
|
80
|
+
stats = {
|
|
81
|
+
'count': len(durations),
|
|
82
|
+
'total_duration': sum(durations),
|
|
83
|
+
'mean_duration': np.mean(durations),
|
|
84
|
+
'median_duration': np.median(durations),
|
|
85
|
+
'std_duration': np.std(durations),
|
|
86
|
+
'min_duration': np.min(durations),
|
|
87
|
+
'max_duration': np.max(durations),
|
|
88
|
+
'percentiles': {
|
|
89
|
+
'25th': np.percentile(durations, 25),
|
|
90
|
+
'75th': np.percentile(durations, 75),
|
|
91
|
+
'90th': np.percentile(durations, 90),
|
|
92
|
+
'95th': np.percentile(durations, 95)
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
return stats
|
|
97
|
+
|
|
98
|
+
def compute_amplitude_statistics(self) -> Dict:
|
|
99
|
+
"""
|
|
100
|
+
计算幅度统计
|
|
101
|
+
|
|
102
|
+
Returns:
|
|
103
|
+
幅度统计信息
|
|
104
|
+
"""
|
|
105
|
+
all_amplitudes = []
|
|
106
|
+
max_amplitudes = []
|
|
107
|
+
rms_values = []
|
|
108
|
+
|
|
109
|
+
for audio in self.audio_data:
|
|
110
|
+
all_amplitudes.extend(np.abs(audio).tolist())
|
|
111
|
+
max_amplitudes.append(np.max(np.abs(audio)))
|
|
112
|
+
rms_values.append(rms_amplitude(audio))
|
|
113
|
+
|
|
114
|
+
if not all_amplitudes:
|
|
115
|
+
return {}
|
|
116
|
+
|
|
117
|
+
all_amplitudes = np.array(all_amplitudes)
|
|
118
|
+
|
|
119
|
+
stats = {
|
|
120
|
+
'overall': {
|
|
121
|
+
'mean': np.mean(all_amplitudes),
|
|
122
|
+
'std': np.std(all_amplitudes),
|
|
123
|
+
'min': np.min(all_amplitudes),
|
|
124
|
+
'max': np.max(all_amplitudes),
|
|
125
|
+
'percentiles': {
|
|
126
|
+
'50th': np.percentile(all_amplitudes, 50),
|
|
127
|
+
'90th': np.percentile(all_amplitudes, 90),
|
|
128
|
+
'95th': np.percentile(all_amplitudes, 95),
|
|
129
|
+
'99th': np.percentile(all_amplitudes, 99)
|
|
130
|
+
}
|
|
131
|
+
},
|
|
132
|
+
'peak_amplitudes': {
|
|
133
|
+
'mean': np.mean(max_amplitudes),
|
|
134
|
+
'std': np.std(max_amplitudes),
|
|
135
|
+
'min': np.min(max_amplitudes),
|
|
136
|
+
'max': np.max(max_amplitudes)
|
|
137
|
+
},
|
|
138
|
+
'rms_values': {
|
|
139
|
+
'mean': np.mean(rms_values),
|
|
140
|
+
'std': np.std(rms_values),
|
|
141
|
+
'min': np.min(rms_values),
|
|
142
|
+
'max': np.max(rms_values),
|
|
143
|
+
'mean_db': dB(np.mean(rms_values)),
|
|
144
|
+
'std_db': np.std([dB(rms) for rms in rms_values])
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
return stats
|
|
149
|
+
|
|
150
|
+
def compute_frequency_statistics(self) -> Dict:
|
|
151
|
+
"""
|
|
152
|
+
计算频域统计
|
|
153
|
+
|
|
154
|
+
Returns:
|
|
155
|
+
频域统计信息
|
|
156
|
+
"""
|
|
157
|
+
spectral_centroids = []
|
|
158
|
+
spectral_bandwidths = []
|
|
159
|
+
spectral_rolloffs = []
|
|
160
|
+
|
|
161
|
+
for audio in self.audio_data:
|
|
162
|
+
# 计算频谱特征
|
|
163
|
+
centroid = librosa.feature.spectral_centroid(y=audio, sr=self.sr)[0]
|
|
164
|
+
bandwidth = librosa.feature.spectral_bandwidth(y=audio, sr=self.sr)[0]
|
|
165
|
+
rolloff = librosa.feature.spectral_rolloff(y=audio, sr=self.sr)[0]
|
|
166
|
+
|
|
167
|
+
spectral_centroids.extend(centroid.tolist())
|
|
168
|
+
spectral_bandwidths.extend(bandwidth.tolist())
|
|
169
|
+
spectral_rolloffs.extend(rolloff.tolist())
|
|
170
|
+
|
|
171
|
+
if not spectral_centroids:
|
|
172
|
+
return {}
|
|
173
|
+
|
|
174
|
+
stats = {
|
|
175
|
+
'spectral_centroid': {
|
|
176
|
+
'mean': np.mean(spectral_centroids),
|
|
177
|
+
'std': np.std(spectral_centroids),
|
|
178
|
+
'min': np.min(spectral_centroids),
|
|
179
|
+
'max': np.max(spectral_centroids)
|
|
180
|
+
},
|
|
181
|
+
'spectral_bandwidth': {
|
|
182
|
+
'mean': np.mean(spectral_bandwidths),
|
|
183
|
+
'std': np.std(spectral_bandwidths),
|
|
184
|
+
'min': np.min(spectral_bandwidths),
|
|
185
|
+
'max': np.max(spectral_bandwidths)
|
|
186
|
+
},
|
|
187
|
+
'spectral_rolloff': {
|
|
188
|
+
'mean': np.mean(spectral_rolloffs),
|
|
189
|
+
'std': np.std(spectral_rolloffs),
|
|
190
|
+
'min': np.min(spectral_rolloffs),
|
|
191
|
+
'max': np.max(spectral_rolloffs)
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
return stats
|
|
196
|
+
|
|
197
|
+
def detect_outliers(self, feature: str = 'duration', threshold: float = 2.0) -> List[Tuple[str, float]]:
|
|
198
|
+
"""
|
|
199
|
+
检测异常值
|
|
200
|
+
|
|
201
|
+
Args:
|
|
202
|
+
feature: 要检测的特征 ('duration', 'max_amplitude', 'rms')
|
|
203
|
+
threshold: Z-score阈值
|
|
204
|
+
|
|
205
|
+
Returns:
|
|
206
|
+
异常文件列表 [(文件路径, 特征值)]
|
|
207
|
+
"""
|
|
208
|
+
if feature == 'duration':
|
|
209
|
+
values = [len(audio) / self.sr for audio in self.audio_data]
|
|
210
|
+
elif feature == 'max_amplitude':
|
|
211
|
+
values = [np.max(np.abs(audio)) for audio in self.audio_data]
|
|
212
|
+
elif feature == 'rms':
|
|
213
|
+
values = [rms_amplitude(audio) for audio in self.audio_data]
|
|
214
|
+
else:
|
|
215
|
+
raise ValueError(f"Unknown feature: {feature}")
|
|
216
|
+
|
|
217
|
+
values = np.array(values)
|
|
218
|
+
mean_val = np.mean(values)
|
|
219
|
+
std_val = np.std(values)
|
|
220
|
+
|
|
221
|
+
outliers = []
|
|
222
|
+
for i, (path, val) in enumerate(zip(self.file_paths, values)):
|
|
223
|
+
z_score = abs(val - mean_val) / (std_val + 1e-10)
|
|
224
|
+
if z_score > threshold:
|
|
225
|
+
outliers.append((path, val))
|
|
226
|
+
|
|
227
|
+
return outliers
|
|
228
|
+
|
|
229
|
+
def generate_distribution_analysis(self) -> Dict:
|
|
230
|
+
"""
|
|
231
|
+
生成分布分析
|
|
232
|
+
|
|
233
|
+
Returns:
|
|
234
|
+
分布分析结果
|
|
235
|
+
"""
|
|
236
|
+
analysis = {
|
|
237
|
+
'duration_distribution': self._analyze_distribution([len(audio) / self.sr for audio in self.audio_data]),
|
|
238
|
+
'amplitude_distribution': self._analyze_distribution([np.max(np.abs(audio)) for audio in self.audio_data]),
|
|
239
|
+
'rms_distribution': self._analyze_distribution([rms_amplitude(audio) for audio in self.audio_data])
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
return analysis
|
|
243
|
+
|
|
244
|
+
def _analyze_distribution(self, values: List[float]) -> Dict:
|
|
245
|
+
"""
|
|
246
|
+
分析数值分布
|
|
247
|
+
|
|
248
|
+
Args:
|
|
249
|
+
values: 数值列表
|
|
250
|
+
|
|
251
|
+
Returns:
|
|
252
|
+
分布分析结果
|
|
253
|
+
"""
|
|
254
|
+
if not values:
|
|
255
|
+
return {}
|
|
256
|
+
|
|
257
|
+
values = np.array(values)
|
|
258
|
+
|
|
259
|
+
# 计算偏度和峰度
|
|
260
|
+
mean_val = np.mean(values)
|
|
261
|
+
std_val = np.std(values)
|
|
262
|
+
|
|
263
|
+
# 偏度 (skewness)
|
|
264
|
+
skewness = np.mean(((values - mean_val) / (std_val + 1e-10)) ** 3)
|
|
265
|
+
|
|
266
|
+
# 峰度 (kurtosis)
|
|
267
|
+
kurtosis = np.mean(((values - mean_val) / (std_val + 1e-10)) ** 4) - 3
|
|
268
|
+
|
|
269
|
+
return {
|
|
270
|
+
'mean': mean_val,
|
|
271
|
+
'std': std_val,
|
|
272
|
+
'skewness': skewness,
|
|
273
|
+
'kurtosis': kurtosis,
|
|
274
|
+
'distribution_type': self._classify_distribution(skewness, kurtosis)
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
def _classify_distribution(self, skewness: float, kurtosis: float) -> str:
|
|
278
|
+
"""
|
|
279
|
+
分类分布类型
|
|
280
|
+
|
|
281
|
+
Args:
|
|
282
|
+
skewness: 偏度
|
|
283
|
+
kurtosis: 峰度
|
|
284
|
+
|
|
285
|
+
Returns:
|
|
286
|
+
分布类型描述
|
|
287
|
+
"""
|
|
288
|
+
if abs(skewness) < 0.5 and abs(kurtosis) < 0.5:
|
|
289
|
+
return "approximately_normal"
|
|
290
|
+
elif skewness > 0.5:
|
|
291
|
+
return "right_skewed"
|
|
292
|
+
elif skewness < -0.5:
|
|
293
|
+
return "left_skewed"
|
|
294
|
+
elif kurtosis > 0.5:
|
|
295
|
+
return "heavy_tailed"
|
|
296
|
+
elif kurtosis < -0.5:
|
|
297
|
+
return "light_tailed"
|
|
298
|
+
else:
|
|
299
|
+
return "unknown"
|
|
300
|
+
|
|
301
|
+
def compute_all_statistics(self) -> Dict:
|
|
302
|
+
"""
|
|
303
|
+
计算所有统计信息
|
|
304
|
+
|
|
305
|
+
Returns:
|
|
306
|
+
完整统计报告
|
|
307
|
+
"""
|
|
308
|
+
self.statistics = {
|
|
309
|
+
'file_count': len(self.audio_data),
|
|
310
|
+
'sample_rate': self.sr,
|
|
311
|
+
'duration_stats': self.compute_duration_statistics(),
|
|
312
|
+
'amplitude_stats': self.compute_amplitude_statistics(),
|
|
313
|
+
'frequency_stats': self.compute_frequency_statistics(),
|
|
314
|
+
'distribution_analysis': self.generate_distribution_analysis(),
|
|
315
|
+
'outliers': {
|
|
316
|
+
'duration': self.detect_outliers('duration'),
|
|
317
|
+
'max_amplitude': self.detect_outliers('max_amplitude'),
|
|
318
|
+
'rms': self.detect_outliers('rms')
|
|
319
|
+
}
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
return self.statistics
|
|
323
|
+
|
|
324
|
+
def export_statistics(self, output_path: str):
|
|
325
|
+
"""
|
|
326
|
+
导出统计结果到JSON文件
|
|
327
|
+
|
|
328
|
+
Args:
|
|
329
|
+
output_path: 输出文件路径
|
|
330
|
+
"""
|
|
331
|
+
# 转换numpy类型为python原生类型以便JSON序列化
|
|
332
|
+
def convert_numpy(obj):
|
|
333
|
+
if isinstance(obj, np.ndarray):
|
|
334
|
+
return obj.tolist()
|
|
335
|
+
elif isinstance(obj, np.floating):
|
|
336
|
+
return float(obj)
|
|
337
|
+
elif isinstance(obj, np.integer):
|
|
338
|
+
return int(obj)
|
|
339
|
+
elif isinstance(obj, dict):
|
|
340
|
+
return {key: convert_numpy(value) for key, value in obj.items()}
|
|
341
|
+
elif isinstance(obj, list):
|
|
342
|
+
return [convert_numpy(item) for item in obj]
|
|
343
|
+
else:
|
|
344
|
+
return obj
|
|
345
|
+
|
|
346
|
+
stats_json = convert_numpy(self.statistics)
|
|
347
|
+
|
|
348
|
+
with open(output_path, 'w', encoding='utf-8') as f:
|
|
349
|
+
json.dump(stats_json, f, indent=2, ensure_ascii=False)
|
|
350
|
+
|
|
351
|
+
|
|
352
|
+
def quick_audio_stats(file_paths: List[str], sr: int = 22050) -> Dict:
|
|
353
|
+
"""
|
|
354
|
+
快速音频统计分析
|
|
355
|
+
|
|
356
|
+
Args:
|
|
357
|
+
file_paths: 音频文件路径列表
|
|
358
|
+
sr: 采样率
|
|
359
|
+
|
|
360
|
+
Returns:
|
|
361
|
+
统计结果
|
|
362
|
+
"""
|
|
363
|
+
analyzer = AudioStatistics(sr=sr)
|
|
364
|
+
|
|
365
|
+
for file_path in file_paths:
|
|
366
|
+
analyzer.add_audio_file(file_path)
|
|
367
|
+
|
|
368
|
+
return analyzer.compute_all_statistics()
|
|
369
|
+
|
|
370
|
+
|
|
371
|
+
def compare_datasets(dataset1_paths: List[str], dataset2_paths: List[str],
|
|
372
|
+
sr: int = 22050) -> Dict:
|
|
373
|
+
"""
|
|
374
|
+
比较两个数据集
|
|
375
|
+
|
|
376
|
+
Args:
|
|
377
|
+
dataset1_paths: 数据集1文件路径
|
|
378
|
+
dataset2_paths: 数据集2文件路径
|
|
379
|
+
sr: 采样率
|
|
380
|
+
|
|
381
|
+
Returns:
|
|
382
|
+
比较结果
|
|
383
|
+
"""
|
|
384
|
+
analyzer1 = AudioStatistics(sr=sr)
|
|
385
|
+
analyzer2 = AudioStatistics(sr=sr)
|
|
386
|
+
|
|
387
|
+
for path in dataset1_paths:
|
|
388
|
+
analyzer1.add_audio_file(path)
|
|
389
|
+
|
|
390
|
+
for path in dataset2_paths:
|
|
391
|
+
analyzer2.add_audio_file(path)
|
|
392
|
+
|
|
393
|
+
stats1 = analyzer1.compute_all_statistics()
|
|
394
|
+
stats2 = analyzer2.compute_all_statistics()
|
|
395
|
+
|
|
396
|
+
comparison = {
|
|
397
|
+
'dataset1': stats1,
|
|
398
|
+
'dataset2': stats2,
|
|
399
|
+
'differences': {
|
|
400
|
+
'file_count_diff': stats2['file_count'] - stats1['file_count'],
|
|
401
|
+
'mean_duration_diff': stats2['duration_stats']['mean_duration'] - stats1['duration_stats']['mean_duration'],
|
|
402
|
+
'mean_rms_diff': stats2['amplitude_stats']['rms_values']['mean'] - stats1['amplitude_stats']['rms_values']['mean']
|
|
403
|
+
}
|
|
404
|
+
}
|
|
405
|
+
|
|
406
|
+
return comparison
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
'''
|
|
2
|
+
Author: 凌逆战 | Never
|
|
3
|
+
Date: 2025-08-05 01:36:09
|
|
4
|
+
Description:
|
|
5
|
+
时域特征分析模块
|
|
6
|
+
Temporal Features Analysis Module
|
|
7
|
+
|
|
8
|
+
提供音频时域特征提取和分析功能
|
|
9
|
+
'''
|
|
10
|
+
|
|
11
|
+
import warnings
|
|
12
|
+
from typing import Tuple, Optional, Union
|
|
13
|
+
from scipy import signal
|
|
14
|
+
import numpy as np
|
|
15
|
+
import librosa
|
|
16
|
+
# from neverlib.utils.utils import dB
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def dB(level):
|
|
20
|
+
"""将线性幅度转换为分贝
|
|
21
|
+
|
|
22
|
+
Args:
|
|
23
|
+
level: 线性幅度值
|
|
24
|
+
|
|
25
|
+
Returns:
|
|
26
|
+
分贝值
|
|
27
|
+
"""
|
|
28
|
+
return 20 * np.log10(level + 1e-10)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def peak_amplitude(wav):
|
|
32
|
+
""" 计算峰值幅度
|
|
33
|
+
:param wav: (*, ch)
|
|
34
|
+
:return:
|
|
35
|
+
"""
|
|
36
|
+
peak_amp = np.max(np.abs(wav))
|
|
37
|
+
return dB(peak_amp)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def rms_amplitude(wav, frame_length=512, hop_length=256):
|
|
41
|
+
""" 总计RMS振幅
|
|
42
|
+
:param wav: (*, ch)
|
|
43
|
+
:return: (frame_num,)
|
|
44
|
+
"""
|
|
45
|
+
# 分帧
|
|
46
|
+
frame = librosa.util.frame(wav.flatten(), frame_length=frame_length, hop_length=hop_length) # (frame_length, frame_num)
|
|
47
|
+
rms_amp = np.sqrt(np.mean(np.square(frame), axis=0)) # (frame_num,)
|
|
48
|
+
return dB(rms_amp)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def mean_rms_amplitude(wav):
|
|
52
|
+
""" 计算平均RMS振幅
|
|
53
|
+
:param wav: (*, ch)
|
|
54
|
+
:return:
|
|
55
|
+
"""
|
|
56
|
+
return np.mean(rms_amplitude(wav))
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def min_rms_amplitude(wav):
|
|
60
|
+
""" 计算最小RMS振幅
|
|
61
|
+
:param wav: (*, ch)
|
|
62
|
+
:return:
|
|
63
|
+
"""
|
|
64
|
+
return np.min(rms_amplitude(wav))
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def max_rms_amplitude(wav):
|
|
68
|
+
""" 计算最大RMS振幅
|
|
69
|
+
:param wav: (*, ch)
|
|
70
|
+
:return:
|
|
71
|
+
"""
|
|
72
|
+
return np.max(rms_amplitude(wav))
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def zero_crossing_rate(self, audio: np.ndarray) -> np.ndarray:
|
|
76
|
+
"""
|
|
77
|
+
计算过零率
|
|
78
|
+
|
|
79
|
+
Args:
|
|
80
|
+
audio: 音频信号
|
|
81
|
+
|
|
82
|
+
Returns:
|
|
83
|
+
过零率数组
|
|
84
|
+
"""
|
|
85
|
+
return librosa.feature.zero_crossing_rate(
|
|
86
|
+
audio, frame_length=self.frame_length, hop_length=self.hop_length
|
|
87
|
+
)[0]
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def short_time_energy(self, audio: np.ndarray) -> np.ndarray:
|
|
91
|
+
"""
|
|
92
|
+
计算短时能量
|
|
93
|
+
|
|
94
|
+
Args:
|
|
95
|
+
audio: 音频信号
|
|
96
|
+
|
|
97
|
+
Returns:
|
|
98
|
+
短时能量数组
|
|
99
|
+
"""
|
|
100
|
+
# 分帧
|
|
101
|
+
frames = librosa.util.frame(
|
|
102
|
+
audio, frame_length=self.frame_length, hop_length=self.hop_length
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
# 计算每帧的能量
|
|
106
|
+
energy = np.sum(frames ** 2, axis=0)
|
|
107
|
+
|
|
108
|
+
return energy
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def dc_offset(wav):
|
|
112
|
+
""" 计算直流分量
|
|
113
|
+
:param wav: (*, ch)
|
|
114
|
+
:return:
|
|
115
|
+
"""
|
|
116
|
+
return np.mean(wav)
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
if __name__ == "__main__":
|
|
120
|
+
wav = np.random.randn(16000)
|
|
121
|
+
# print(peak_amplitude(wav))
|
|
122
|
+
print(rms_amplitude(wav).shape)
|
|
123
|
+
# print(mean_rms_amplitude(wav))
|
|
124
|
+
# print(zero_crossing_rate(wav))
|
|
125
|
+
# print(short_time_energy(wav))
|
|
126
|
+
# print(dc_offset(wav))
|