neverlib 0.2.3__py3-none-any.whl → 0.2.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- neverlib/.history/Docs/audio_aug/test_snr_20250806011311.py +0 -0
- neverlib/.history/Docs/audio_aug/test_snr_20250806011331.py +75 -0
- neverlib/.history/Docs/audio_aug/test_snr_20250806011342.py +57 -0
- neverlib/.history/Docs/audio_aug/test_snr_20250806011352.py +57 -0
- neverlib/.history/Docs/audio_aug/test_snr_20250806011403.py +57 -0
- neverlib/.history/Docs/audio_aug/test_snr_20250806011413.py +57 -0
- neverlib/.history/Docs/audio_aug/test_snr_20250806011435.py +55 -0
- neverlib/.history/Docs/vad/1_20250810032405.py +0 -0
- neverlib/.history/Docs/vad/1_20250810032417.py +39 -0
- neverlib/.history/audio_aug/audio_aug_20250806010451.py +125 -0
- neverlib/.history/audio_aug/audio_aug_20250806010750.py +138 -0
- neverlib/.history/audio_aug/audio_aug_20250806010759.py +140 -0
- neverlib/.history/audio_aug/audio_aug_20250806010803.py +140 -0
- neverlib/.history/audio_aug/audio_aug_20250806010809.py +140 -0
- neverlib/.history/audio_aug/audio_aug_20250806011108.py +140 -0
- neverlib/.history/dataAnalyze/__init___20250806204125.py +14 -0
- neverlib/.history/dataAnalyze/__init___20250806204139.py +14 -0
- neverlib/.history/dataAnalyze/__init___20250806204159.py +14 -0
- neverlib/.history/filter/__init___20250820103351.py +70 -0
- neverlib/.history/filter/__init___20250821102348.py +70 -0
- neverlib/.history/filter/__init___20250821102405.py +14 -0
- neverlib/.history/filter/auto_eq/__init___20250819213121.py +36 -0
- neverlib/.history/filter/auto_eq/__init___20250821102241.py +36 -0
- neverlib/.history/filter/auto_eq/__init___20250821102259.py +36 -0
- neverlib/.history/filter/auto_eq/__init___20250821102307.py +36 -0
- neverlib/.history/filter/auto_eq/__init___20250821102310.py +36 -0
- neverlib/.history/filter/auto_eq/__init___20250821102318.py +36 -0
- neverlib/.history/filter/auto_eq/__init___20250821102507.py +36 -0
- neverlib/{filter/AudoEQ/auto_eq_de.py → .history/filter/auto_eq/de_eq_20250820103848.py} +1 -1
- neverlib/.history/filter/auto_eq/de_eq_20250821102422.py +360 -0
- neverlib/.history/filter/auto_eq/freq_eq_20250820140732.py +75 -0
- neverlib/.history/filter/auto_eq/freq_eq_20250820140745.py +75 -0
- neverlib/.history/filter/auto_eq/freq_eq_20250820140816.py +75 -0
- neverlib/.history/filter/auto_eq/freq_eq_20250820140938.py +77 -0
- neverlib/.history/filter/auto_eq/freq_eq_20250820141003.py +77 -0
- neverlib/.history/filter/auto_eq/freq_eq_20250820141006.py +77 -0
- neverlib/.history/filter/auto_eq/freq_eq_20250820141019.py +77 -0
- neverlib/.history/filter/auto_eq/freq_eq_20250820141049.py +77 -0
- neverlib/.history/filter/auto_eq/freq_eq_20250820141211.py +77 -0
- neverlib/.history/filter/auto_eq/freq_eq_20250820141227.py +77 -0
- neverlib/.history/filter/auto_eq/freq_eq_20250820141311.py +78 -0
- neverlib/.history/filter/auto_eq/freq_eq_20250820141340.py +78 -0
- neverlib/.history/filter/auto_eq/freq_eq_20250820141712.py +78 -0
- neverlib/.history/filter/auto_eq/freq_eq_20250820141733.py +78 -0
- neverlib/.history/filter/auto_eq/freq_eq_20250820141755.py +78 -0
- neverlib/.history/filter/auto_eq/freq_eq_20250821102434.py +76 -0
- neverlib/.history/filter/auto_eq/freq_eq_20250821102500.py +76 -0
- neverlib/.history/filter/auto_eq/freq_eq_20250821102502.py +76 -0
- neverlib/{filter/AudoEQ/auto_eq_ga_basic.py → .history/filter/auto_eq/ga_eq_basic_20250820102957.py} +1 -1
- neverlib/.history/filter/auto_eq/ga_eq_basic_20250820113054.py +380 -0
- neverlib/.history/filter/auto_eq/ga_eq_basic_20250820113150.py +380 -0
- neverlib/.history/filter/auto_eq/ga_eq_basic_20250820113520.py +385 -0
- neverlib/.history/filter/auto_eq/ga_eq_basic_20250820113525.py +385 -0
- neverlib/.history/filter/auto_eq/ga_eq_basic_20250821102212.py +385 -0
- neverlib/.history/metrics/dnsmos_20250806001612.py +160 -0
- neverlib/.history/metrics/dnsmos_20250815180659.py +160 -0
- neverlib/.history/metrics/dnsmos_20250815180701.py +158 -0
- neverlib/.history/metrics/dnsmos_20250815181321.py +154 -0
- neverlib/.history/metrics/dnsmos_20250815181327.py +154 -0
- neverlib/.history/metrics/dnsmos_20250815181331.py +154 -0
- neverlib/.history/metrics/dnsmos_20250815181620.py +154 -0
- neverlib/.history/metrics/dnsmos_20250815181631.py +154 -0
- neverlib/.history/metrics/dnsmos_20250815181742.py +154 -0
- neverlib/.history/metrics/dnsmos_20250815181824.py +153 -0
- neverlib/.history/metrics/dnsmos_20250815181834.py +153 -0
- neverlib/.history/metrics/dnsmos_20250815181922.py +153 -0
- neverlib/.history/metrics/dnsmos_20250815182011.py +147 -0
- neverlib/.history/metrics/dnsmos_20250815182036.py +144 -0
- neverlib/.history/metrics/dnsmos_20250815182936.py +143 -0
- neverlib/.history/metrics/dnsmos_20250815182942.py +143 -0
- neverlib/.history/metrics/dnsmos_20250815183032.py +137 -0
- neverlib/.history/metrics/dnsmos_20250815183101.py +144 -0
- neverlib/.history/metrics/dnsmos_20250815183121.py +144 -0
- neverlib/.history/metrics/dnsmos_20250815183123.py +143 -0
- neverlib/.history/metrics/dnsmos_20250815183214.py +143 -0
- neverlib/.history/metrics/dnsmos_20250815183240.py +143 -0
- neverlib/.history/metrics/dnsmos_20250815183248.py +144 -0
- neverlib/.history/metrics/dnsmos_20250815183407.py +142 -0
- neverlib/.history/metrics/dnsmos_20250815183409.py +142 -0
- neverlib/.history/metrics/dnsmos_20250815183431.py +142 -0
- neverlib/.history/metrics/dnsmos_20250815183507.py +140 -0
- neverlib/.history/metrics/dnsmos_20250815183513.py +139 -0
- neverlib/.history/metrics/dnsmos_20250815183618.py +139 -0
- neverlib/.history/metrics/dnsmos_20250815183709.py +140 -0
- neverlib/.history/metrics/dnsmos_20250815183756.py +137 -0
- neverlib/.history/metrics/dnsmos_20250815183815.py +128 -0
- neverlib/.history/metrics/dnsmos_20250815183827.py +129 -0
- neverlib/.history/metrics/dnsmos_20250815183913.py +117 -0
- neverlib/.history/metrics/dnsmos_20250815183914.py +117 -0
- neverlib/.history/metrics/dnsmos_20250815184003.py +118 -0
- neverlib/.history/metrics/dnsmos_20250815184040.py +118 -0
- neverlib/.history/metrics/dnsmos_20250815184049.py +118 -0
- neverlib/.history/metrics/dnsmos_20250815184104.py +117 -0
- neverlib/.history/metrics/dnsmos_20250815184200.py +117 -0
- neverlib/.history/metrics/lpc_lsp_metric_20250816015944.py +128 -0
- neverlib/.history/metrics/lpc_lsp_metric_20250816020142.py +128 -0
- neverlib/.history/metrics/lpc_lsp_metric_20250816020156.py +128 -0
- neverlib/.history/metrics/lpc_lsp_metric_20250816020554.py +130 -0
- neverlib/.history/metrics/lpc_lsp_metric_20250816020600.py +125 -0
- neverlib/.history/metrics/lpc_lsp_metric_20250816020631.py +120 -0
- neverlib/.history/metrics/lpc_lsp_metric_20250816020746.py +118 -0
- neverlib/.history/metrics/lpc_me_20250816013111.py +0 -0
- neverlib/.history/metrics/lpc_me_20250816013129.py +121 -0
- neverlib/.history/metrics/lpc_me_20250816015430.py +103 -0
- neverlib/.history/metrics/lpc_me_20250816015535.py +96 -0
- neverlib/.history/metrics/lpc_me_20250816015542.py +96 -0
- neverlib/.history/metrics/lpc_me_20250816015636.py +97 -0
- neverlib/.history/metrics/lpc_me_20250816015658.py +104 -0
- neverlib/.history/metrics/lpc_me_20250816015703.py +100 -0
- neverlib/.history/metrics/lpc_me_20250816015945.py +128 -0
- neverlib/.history/metrics/snr_20250806010538.py +177 -0
- neverlib/.history/metrics/snr_20250806211634.py +184 -0
- neverlib/.history/metrics/spec_20250805234209.py +45 -0
- neverlib/.history/metrics/spec_20250816135530.py +11 -0
- neverlib/.history/metrics/spec_20250816135654.py +16 -0
- neverlib/.history/metrics/spec_20250816135736.py +68 -0
- neverlib/.history/metrics/spec_20250816135904.py +75 -0
- neverlib/.history/metrics/spec_20250816135921.py +82 -0
- neverlib/.history/metrics/spec_20250816140111.py +82 -0
- neverlib/.history/metrics/spec_20250816140543.py +136 -0
- neverlib/.history/metrics/spec_20250816140559.py +172 -0
- neverlib/.history/metrics/spec_20250816140602.py +172 -0
- neverlib/.history/metrics/spec_20250816140608.py +172 -0
- neverlib/.history/metrics/spec_20250816140654.py +148 -0
- neverlib/.history/metrics/spec_20250816140705.py +144 -0
- neverlib/.history/metrics/spec_20250816140755.py +138 -0
- neverlib/.history/metrics/spec_20250816140823.py +170 -0
- neverlib/.history/metrics/spec_20250816140832.py +170 -0
- neverlib/.history/metrics/spec_20250816140833.py +170 -0
- neverlib/.history/metrics/spec_20250816140922.py +147 -0
- neverlib/.history/metrics/spec_20250816141148.py +107 -0
- neverlib/.history/metrics/spec_20250816141219.py +123 -0
- neverlib/.history/metrics/spec_20250816141732.py +178 -0
- neverlib/.history/metrics/spec_20250816141740.py +178 -0
- neverlib/.history/metrics/spec_20250816142030.py +178 -0
- neverlib/.history/metrics/spec_20250816142107.py +135 -0
- neverlib/.history/metrics/spec_20250816142126.py +135 -0
- neverlib/.history/metrics/spec_20250816142410.py +135 -0
- neverlib/.history/metrics/spec_20250816142415.py +136 -0
- neverlib/.history/metrics/spec_metric_20250816135156.py +0 -0
- neverlib/.history/metrics/spec_metric_20250816135226.py +5 -0
- neverlib/.history/metrics/spec_metric_20250816135227.py +10 -0
- neverlib/.history/metrics/spec_metric_20250816135306.py +15 -0
- neverlib/.history/metrics/spec_metric_20250816135442.py +31 -0
- neverlib/.history/metrics/spec_metric_20250816135448.py +31 -0
- neverlib/.history/metrics/spec_metric_20250816135520.py +29 -0
- neverlib/.history/metrics/spec_metric_20250816135537.py +63 -0
- neverlib/.history/metrics/spec_metric_20250816135653.py +65 -0
- neverlib/.history/vad/PreProcess_20250805234211.py +63 -0
- neverlib/.history/vad/PreProcess_20250809232455.py +63 -0
- neverlib/.history/vad/PreProcess_20250816020725.py +66 -0
- neverlib/.history/vad/VAD_Silero_20250805234211.py +50 -0
- neverlib/.history/vad/VAD_Silero_20250809232456.py +50 -0
- neverlib/.history/vad/VAD_WebRTC_20250805234211.py +61 -0
- neverlib/.history/vad/VAD_WebRTC_20250809232456.py +61 -0
- neverlib/.history/vad/VAD_funasr_20250805234211.py +54 -0
- neverlib/.history/vad/VAD_funasr_20250809232456.py +54 -0
- neverlib/.history/vad/VAD_vadlib_20250805234211.py +70 -0
- neverlib/.history/vad/VAD_vadlib_20250809232455.py +70 -0
- neverlib/.history/vad/VAD_whisper_20250805234211.py +55 -0
- neverlib/.history/vad/VAD_whisper_20250809232456.py +55 -0
- neverlib/.specstory/.what-is-this.md +69 -0
- neverlib/.specstory/history/2025-08-05_17-06Z-/350/277/231/344/270/200/346/255/245/347/232/204/347/233/256/347/232/204/346/230/257/344/273/200/344/271/210.md +424 -0
- neverlib/Docs/audio_aug/test_snr.py +55 -0
- neverlib/audio_aug/HarmonicDistortion.py +79 -0
- neverlib/audio_aug/TFDrop.py +41 -0
- neverlib/audio_aug/TFMask.py +56 -0
- neverlib/audio_aug/audio_aug.py +16 -1
- neverlib/audio_aug/clip_aug.py +41 -0
- neverlib/audio_aug/coder_aug.py +209 -0
- neverlib/audio_aug/coder_aug2.py +118 -0
- neverlib/audio_aug/loss_packet_aug.py +103 -0
- neverlib/audio_aug/quant_aug.py +78 -0
- neverlib/data_analyze/__init__.py +14 -0
- neverlib/filter/auto_eq/__init__.py +36 -0
- neverlib/filter/auto_eq/de_eq.py +360 -0
- neverlib/filter/auto_eq/freq_eq.py +76 -0
- neverlib/filter/{AudoEQ/auto_eq_ga_advanced.py → auto_eq/ga_eq_advanced.py} +1 -1
- neverlib/filter/auto_eq/ga_eq_basic.py +385 -0
- neverlib/metrics/dnsmos.py +58 -101
- neverlib/metrics/lpc_lsp.py +118 -0
- neverlib/metrics/snr.py +11 -4
- neverlib/metrics/spec.py +136 -45
- neverlib/utils/utils.py +17 -14
- neverlib/vad/PreProcess.py +5 -2
- neverlib/vad/VAD_Silero.py +1 -1
- neverlib/vad/VAD_WebRTC.py +1 -1
- neverlib/vad/VAD_funasr.py +1 -1
- neverlib/vad/VAD_vadlib.py +1 -1
- neverlib/vad/VAD_whisper.py +1 -1
- {neverlib-0.2.3.dist-info → neverlib-0.2.4.dist-info}/METADATA +1 -1
- neverlib-0.2.4.dist-info/RECORD +229 -0
- neverlib-0.2.3.dist-info/RECORD +0 -53
- /neverlib/{dataAnalyze/__init__.py → .history/dataAnalyze/__init___20250805234204.py} +0 -0
- /neverlib/{filter/AudoEQ/auto_eq_spectral_direct.py → .history/filter/auto_eq/freq_eq_20250805234206.py} +0 -0
- /neverlib/{dataAnalyze → data_analyze}/README.md +0 -0
- /neverlib/{dataAnalyze → data_analyze}/dataset_analyzer.py +0 -0
- /neverlib/{dataAnalyze → data_analyze}/quality_metrics.py +0 -0
- /neverlib/{dataAnalyze → data_analyze}/rms_distrubution.py +0 -0
- /neverlib/{dataAnalyze → data_analyze}/spectral_analysis.py +0 -0
- /neverlib/{dataAnalyze → data_analyze}/statistics.py +0 -0
- /neverlib/{dataAnalyze → data_analyze}/temporal_features.py +0 -0
- /neverlib/{dataAnalyze → data_analyze}/visualization.py +0 -0
- /neverlib/filter/{AudoEQ → auto_eq}/README.md +0 -0
- {neverlib-0.2.3.dist-info → neverlib-0.2.4.dist-info}/WHEEL +0 -0
- {neverlib-0.2.3.dist-info → neverlib-0.2.4.dist-info}/licenses/LICENSE +0 -0
- {neverlib-0.2.3.dist-info → neverlib-0.2.4.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
'''
|
|
2
|
+
功能描述
|
|
3
|
+
|
|
4
|
+
计算参考语音和测试语音之间的线性预测编码-线谱对(LPC-LSP)参
|
|
5
|
+
数失真度
|
|
6
|
+
|
|
7
|
+
主要组件
|
|
8
|
+
|
|
9
|
+
预处理函数:
|
|
10
|
+
- pre_emphasis(): 预加重滤波,增强高频成分
|
|
11
|
+
- framing(): 分帧处理并应用汉明窗
|
|
12
|
+
|
|
13
|
+
LPC分析:
|
|
14
|
+
- lpc_analysis(): 使用librosa.lpc进行线性预测分析
|
|
15
|
+
- lpc_to_lsp(): LPC系数转换为线谱对参数
|
|
16
|
+
|
|
17
|
+
距离计算:
|
|
18
|
+
- lsp_mse(): 计算LSP向量间的均方误差
|
|
19
|
+
- lpc_lsp_distance(): 主函数,返回平均失真度和逐帧失真列表
|
|
20
|
+
|
|
21
|
+
技术特点
|
|
22
|
+
|
|
23
|
+
- 使用soundfile读取音频(支持多种格式)
|
|
24
|
+
- librosa进行LPC分析(替代了自定义算法)
|
|
25
|
+
- 基于LSP的频域失真测量,对量化误差敏感度更低
|
|
26
|
+
- 逐帧分析捕捉语音时变特性
|
|
27
|
+
|
|
28
|
+
应用场景
|
|
29
|
+
|
|
30
|
+
语音编码器质量评估、语音增强效果测量、语音合成质量分析
|
|
31
|
+
'''
|
|
32
|
+
import numpy as np
|
|
33
|
+
import librosa
|
|
34
|
+
import soundfile as sf
|
|
35
|
+
from neverlib.vad.PreProcess import pre_emphasis
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def framing(signal, frame_size, frame_stride, fs):
|
|
39
|
+
"""分帧 + 汉明窗"""
|
|
40
|
+
frame_length = int(round(frame_size * fs))
|
|
41
|
+
frame_step = int(round(frame_stride * fs))
|
|
42
|
+
|
|
43
|
+
# 使用 librosa 进行分帧
|
|
44
|
+
frames = librosa.util.frame(signal, frame_length=frame_length, hop_length=frame_step, axis=0)
|
|
45
|
+
|
|
46
|
+
# frames的形状是(num_frames, frame_length)
|
|
47
|
+
hamming_window = np.hamming(frame_length)
|
|
48
|
+
frames = frames * hamming_window # 直接广播
|
|
49
|
+
|
|
50
|
+
return frames
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def lpc_to_lsp(a, num_points=512):
|
|
54
|
+
"""
|
|
55
|
+
LPC -> LSP 转换(简易近似版,零点搜索法)
|
|
56
|
+
"""
|
|
57
|
+
p = len(a) - 1
|
|
58
|
+
a = np.array(a)
|
|
59
|
+
# 构造P(z) Q(z)
|
|
60
|
+
P = np.zeros(p+1)
|
|
61
|
+
Q = np.zeros(p+1)
|
|
62
|
+
for i in range(p+1):
|
|
63
|
+
if i == 0:
|
|
64
|
+
P[i] = 1 + a[i]
|
|
65
|
+
Q[i] = 1 - a[i]
|
|
66
|
+
else:
|
|
67
|
+
P[i] = a[i] + a[p - i]
|
|
68
|
+
Q[i] = a[i] - a[p - i]
|
|
69
|
+
# 频域采样找过零点
|
|
70
|
+
w = np.linspace(0, np.pi, num_points)
|
|
71
|
+
Pw = np.polyval(P[::-1], np.cos(w))
|
|
72
|
+
Qw = np.polyval(Q[::-1], np.cos(w))
|
|
73
|
+
|
|
74
|
+
# 找零点近似位置
|
|
75
|
+
roots_P = w[np.where(np.diff(np.sign(Pw)) != 0)]
|
|
76
|
+
roots_Q = w[np.where(np.diff(np.sign(Qw)) != 0)]
|
|
77
|
+
lsp = np.sort(np.concatenate([roots_P, roots_Q]))
|
|
78
|
+
return lsp
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def lpc_lsp_distance(ref_wav, test_wav, frame_size=0.025, frame_stride=0.01, order=12):
|
|
82
|
+
"""主函数:计算 LPC-LSP 参数失真"""
|
|
83
|
+
ref_sig, fs_r = sf.read(ref_wav, dtype='float32')
|
|
84
|
+
test_sig, fs_t = sf.read(test_wav, dtype='float32')
|
|
85
|
+
|
|
86
|
+
# 预加重
|
|
87
|
+
ref_sig = pre_emphasis(ref_sig)
|
|
88
|
+
test_sig = pre_emphasis(test_sig)
|
|
89
|
+
|
|
90
|
+
# 分帧
|
|
91
|
+
ref_frames = framing(ref_sig, frame_size, frame_stride, fs_r)
|
|
92
|
+
test_frames = framing(test_sig, frame_size, frame_stride, fs_t)
|
|
93
|
+
|
|
94
|
+
# 对齐帧数(简单切到最短)
|
|
95
|
+
num_frames = min(len(ref_frames), len(test_frames))
|
|
96
|
+
ref_frames = ref_frames[:num_frames]
|
|
97
|
+
test_frames = test_frames[:num_frames]
|
|
98
|
+
|
|
99
|
+
distances = []
|
|
100
|
+
for i in range(num_frames):
|
|
101
|
+
a_ref = librosa.lpc(ref_frames[i], order=order)
|
|
102
|
+
a_test = librosa.lpc(test_frames[i], order=order)
|
|
103
|
+
lsp_ref = lpc_to_lsp(a_ref)
|
|
104
|
+
lsp_test = lpc_to_lsp(a_test)
|
|
105
|
+
# 对齐长度(简单裁切)
|
|
106
|
+
min_len = min(len(lsp_ref), len(lsp_test))
|
|
107
|
+
# 计算两个 LSP 向量的均方差
|
|
108
|
+
dist = np.mean((lsp_ref[:min_len] - lsp_test[:min_len]) ** 2)
|
|
109
|
+
distances.append(dist)
|
|
110
|
+
|
|
111
|
+
return np.mean(distances), distances
|
|
112
|
+
|
|
113
|
+
if __name__ == "__main__":
|
|
114
|
+
ref_file = "../data/vad_example.wav" # 参考语音文件路径
|
|
115
|
+
test_file = "../data/vad_example.wav" # 测试语音文件路径
|
|
116
|
+
|
|
117
|
+
avg_dist, dist_list = lpc_lsp_distance(ref_file, test_file)
|
|
118
|
+
print(f"平均 LSP MSE 失真: {avg_dist}")
|
neverlib/metrics/snr.py
CHANGED
|
@@ -2,7 +2,7 @@ import sys
|
|
|
2
2
|
sys.path.append("../")
|
|
3
3
|
import librosa
|
|
4
4
|
import numpy as np
|
|
5
|
-
from vad.utils import vad2nad
|
|
5
|
+
from neverlib.vad.utils import vad2nad
|
|
6
6
|
|
|
7
7
|
|
|
8
8
|
def get_snr(speech, noise):
|
|
@@ -47,7 +47,14 @@ def get_snr_from_noisy(noisy, speech_vad=None):
|
|
|
47
47
|
noise_segments.append(noisy[noise_point['start']:noise_point['end']])
|
|
48
48
|
noise = np.concatenate(noise_segments, axis=0)
|
|
49
49
|
|
|
50
|
-
|
|
50
|
+
P_speech_noise = np.mean(speech ** 2) # 语音+噪声功率
|
|
51
|
+
P_noise = max(np.mean(noise ** 2), EPS) # 纯噪声功率
|
|
52
|
+
|
|
53
|
+
# 计算净语音功率
|
|
54
|
+
P_speech = max(P_speech_noise - P_noise, EPS)
|
|
55
|
+
snr = 10 * np.log10(P_speech / P_noise)
|
|
56
|
+
|
|
57
|
+
return snr
|
|
51
58
|
|
|
52
59
|
|
|
53
60
|
def seg_snr(clean, noisy, frame_length: int, hop_length: int):
|
|
@@ -173,5 +180,5 @@ if __name__ == "__main__":
|
|
|
173
180
|
|
|
174
181
|
# 测试各种信噪比计算方法
|
|
175
182
|
print(f"SNR: {get_snr(speech, noise):.2f} dB")
|
|
176
|
-
print(f"Segmental SNR: {
|
|
177
|
-
print(f"PSNR: {
|
|
183
|
+
print(f"Segmental SNR: {seg_snr(speech, noisy, 100, 50):.2f} dB")
|
|
184
|
+
print(f"PSNR: {psnr(speech, noisy):.2f} dB")
|
neverlib/metrics/spec.py
CHANGED
|
@@ -1,45 +1,136 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
1
|
+
'''
|
|
2
|
+
Author: 凌逆战 | Never
|
|
3
|
+
Date: 2025-08-16 13:51:57
|
|
4
|
+
Description: 音频信号频域客观度量指标计算工具
|
|
5
|
+
主要功能:
|
|
6
|
+
1. SD (Spectral Distance) - 频谱距离
|
|
7
|
+
- 计算两个音频信号在频域上的差异程度
|
|
8
|
+
- 适用于音频质量评估和信号相似性分析
|
|
9
|
+
|
|
10
|
+
2. LSD (Log-Spectral Distance) - 对数谱距离
|
|
11
|
+
- 在对数功率谱域计算信号距离
|
|
12
|
+
- 更符合人耳听觉特性,常用于语音质量评估
|
|
13
|
+
|
|
14
|
+
3. MCD (Mel-Cepstral Distance) - 梅尔倒谱距离
|
|
15
|
+
- 基于MFCC特征的音频相似性度量
|
|
16
|
+
- 广泛应用于语音合成、语音识别等任务
|
|
17
|
+
'''
|
|
18
|
+
|
|
19
|
+
import librosa
|
|
20
|
+
import numpy as np
|
|
21
|
+
import soundfile as sf
|
|
22
|
+
from neverlib.utils import EPS
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def sd(ref_wav, test_wav, n_fft=2048, hop_length=512, win_length=None):
|
|
26
|
+
"""
|
|
27
|
+
计算两个音频信号之间的频谱距离 (Spectral Distance)。
|
|
28
|
+
该指标衡量两个信号在频域上的差异程度。
|
|
29
|
+
Args:
|
|
30
|
+
ref_wav (np.ndarray): 参考音频信号 (一维数组)
|
|
31
|
+
test_wav (np.ndarray): 测试音频信号 (一维数组)
|
|
32
|
+
n_fft (int): FFT点数,决定频率分辨率,默认为2048
|
|
33
|
+
hop_length (int): 帧移,决定时间分辨率,默认为512
|
|
34
|
+
win_length (int, optional): 窗长,如果为None则默认为n_fft
|
|
35
|
+
Returns:
|
|
36
|
+
float: 频谱距离值,值越小表示两个信号越相似
|
|
37
|
+
"""
|
|
38
|
+
assert len(ref_wav) == len(test_wav), "输入信号长度必须相同"
|
|
39
|
+
|
|
40
|
+
# 计算短时傅里叶变换
|
|
41
|
+
ref_spec = librosa.stft(ref_wav, n_fft=n_fft, hop_length=hop_length, win_length=win_length)
|
|
42
|
+
test_spec = librosa.stft(test_wav, n_fft=n_fft, hop_length=hop_length, win_length=win_length)
|
|
43
|
+
|
|
44
|
+
# 计算频谱距离:均方根误差
|
|
45
|
+
spec_diff = ref_spec - test_spec
|
|
46
|
+
squared_diff = np.abs(spec_diff) ** 2
|
|
47
|
+
mean_squared_diff = np.mean(squared_diff)
|
|
48
|
+
sd_value = np.sqrt(mean_squared_diff)
|
|
49
|
+
|
|
50
|
+
return sd_value
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def lsd(ref_wav, test_wav, n_fft=2048, hop_length=512, win_length=None):
|
|
56
|
+
"""
|
|
57
|
+
计算两个一维音频信号之间的对数谱距离 (Log-Spectral Distance, LSD)。
|
|
58
|
+
该实现遵循标准的LSD定义: 整体均方根误差。
|
|
59
|
+
|
|
60
|
+
Args:
|
|
61
|
+
ref_wav (np.ndarray): 原始的、干净的参考信号 (一维数组)。
|
|
62
|
+
test_wav (np.ndarray): 模型估计或处理后的信号 (一维数组)。
|
|
63
|
+
n_fft (int): FFT点数, 决定了频率分辨率。
|
|
64
|
+
hop_length (int): 帧移, 决定了时间分辨率。
|
|
65
|
+
win_length (int, optional): 窗长。如果为None, 则默认为n_fft。
|
|
66
|
+
epsilon (float): 一个非常小的数值, 用于防止对零取对数, 保证数值稳定性。
|
|
67
|
+
|
|
68
|
+
Returns:
|
|
69
|
+
float: 对数谱距离值, 单位为分贝 (dB)。
|
|
70
|
+
"""
|
|
71
|
+
assert ref_wav.ndim == 1 and test_wav.ndim == 1, "输入信号必须是一维数组。"
|
|
72
|
+
|
|
73
|
+
if win_length is None:
|
|
74
|
+
win_length = n_fft
|
|
75
|
+
|
|
76
|
+
ref_stft = librosa.stft(ref_wav, n_fft=n_fft, hop_length=hop_length, win_length=win_length) # (F,T)
|
|
77
|
+
test_stft = librosa.stft(test_wav, n_fft=n_fft, hop_length=hop_length, win_length=win_length) # (F,T)
|
|
78
|
+
|
|
79
|
+
ref_power_spec = np.abs(ref_stft) ** 2 # (F,T)
|
|
80
|
+
test_power_spec = np.abs(test_stft) ** 2 # (F,T)
|
|
81
|
+
|
|
82
|
+
ref_log_power_spec = 10 * np.log10(ref_power_spec + EPS)
|
|
83
|
+
test_log_power_spec = 10 * np.log10(test_power_spec + EPS)
|
|
84
|
+
|
|
85
|
+
squared_error = (ref_log_power_spec - test_log_power_spec) ** 2
|
|
86
|
+
lsd_val = np.sqrt(np.mean(squared_error))
|
|
87
|
+
|
|
88
|
+
return lsd_val
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def mcd(ref_wav, test_wav, sr=16000, n_mfcc=13):
|
|
92
|
+
"""
|
|
93
|
+
计算两个音频信号之间的梅尔倒谱距离 (Mel-Cepstral Distance, MCD)。
|
|
94
|
+
该指标常用于语音合成质量评估,值越小表示两个信号越相似。
|
|
95
|
+
|
|
96
|
+
Args:
|
|
97
|
+
ref_wav (np.ndarray): 参考音频信号 (一维数组)
|
|
98
|
+
test_wav (np.ndarray): 测试音频信号 (一维数组)
|
|
99
|
+
sr (int): 采样率,默认为16000Hz
|
|
100
|
+
n_mfcc (int): MFCC系数个数,默认为13
|
|
101
|
+
|
|
102
|
+
Returns:
|
|
103
|
+
float: 梅尔倒谱距离值,值越小表示两个信号越相似
|
|
104
|
+
|
|
105
|
+
"""
|
|
106
|
+
assert len(ref_wav) == len(test_wav), "输入信号长度必须相同"
|
|
107
|
+
|
|
108
|
+
# 计算MFCC特征
|
|
109
|
+
ref_mfcc = librosa.feature.mfcc(y=ref_wav, sr=sr, n_mfcc=n_mfcc)
|
|
110
|
+
test_mfcc = librosa.feature.mfcc(y=test_wav, sr=sr, n_mfcc=n_mfcc)
|
|
111
|
+
|
|
112
|
+
# 计算MCD (跳过0阶系数,因为0阶主要表示能量)
|
|
113
|
+
diff = ref_mfcc[1:] - test_mfcc[1:]
|
|
114
|
+
mcd_value = (10.0 / np.log(10)) * np.sqrt(2 * np.mean(np.sum(diff ** 2, axis=0)))
|
|
115
|
+
|
|
116
|
+
return mcd_value
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
if __name__ == "__main__":
|
|
120
|
+
ref_file = "../data/vad_example.wav" # 参考语音文件路径
|
|
121
|
+
test_file = "../data/vad_example.wav" # 测试语音文件路径
|
|
122
|
+
|
|
123
|
+
ref_wav, ref_sr = sf.read(ref_file)
|
|
124
|
+
test_wav, test_sr = sf.read(test_file)
|
|
125
|
+
assert ref_sr == test_sr == 16000, "采样率必须为16000Hz"
|
|
126
|
+
assert len(ref_wav) == len(test_wav), "音频长度必须相同"
|
|
127
|
+
|
|
128
|
+
mcd_value = mcd(ref_wav, test_wav)
|
|
129
|
+
print(f"梅尔倒谱距离: {mcd_value:.2f}")
|
|
130
|
+
|
|
131
|
+
lsd_value = lsd(ref_wav, test_wav)
|
|
132
|
+
print(f"对数谱距离: {lsd_value:.2f}")
|
|
133
|
+
|
|
134
|
+
sd_value = sd(ref_wav, test_wav)
|
|
135
|
+
print(f"频谱距离: {sd_value:.2f}")
|
|
136
|
+
|
neverlib/utils/utils.py
CHANGED
|
@@ -21,7 +21,8 @@ def get_path_list(source_path, end="*.wav", shuffle=False):
|
|
|
21
21
|
# 实现列表特殊字符的过滤或筛选,返回符合匹配“.wav”字符列表
|
|
22
22
|
for filename in fnmatch.filter(filenames, end):
|
|
23
23
|
wav_list.append(os.path.join(root, filename))
|
|
24
|
-
|
|
24
|
+
if os.environ.get("LOCAL_RANK", "0") == "0":
|
|
25
|
+
print(source_path, len(wav_list))
|
|
25
26
|
if shuffle:
|
|
26
27
|
random.shuffle(wav_list)
|
|
27
28
|
return wav_list
|
|
@@ -143,19 +144,21 @@ def get_leaf_folders(directory):
|
|
|
143
144
|
|
|
144
145
|
|
|
145
146
|
def del_empty_folders(path):
|
|
146
|
-
"""
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
147
|
+
"""递归删除空文件夹(先删除子文件夹, 再删除父文件夹)"""
|
|
148
|
+
if not os.path.isdir(path):
|
|
149
|
+
return
|
|
150
|
+
|
|
151
|
+
# 获取子文件夹
|
|
152
|
+
subfolders = [os.path.join(path, d) for d in os.listdir(path) if os.path.isdir(os.path.join(path, d))]
|
|
153
|
+
|
|
154
|
+
# 递归处理子文件夹
|
|
155
|
+
for subfolder in subfolders:
|
|
156
|
+
del_empty_folders(subfolder)
|
|
157
|
+
|
|
158
|
+
# 如果文件夹为空,则删除
|
|
159
|
+
if not os.listdir(path):
|
|
160
|
+
os.rmdir(path)
|
|
161
|
+
print(f"删除空文件夹: {path}")
|
|
159
162
|
|
|
160
163
|
|
|
161
164
|
def DatasetSubfloderSplit(source_dir, split_dirs, percentage=None):
|
neverlib/vad/PreProcess.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
'''
|
|
2
2
|
Author: 凌逆战 | Never
|
|
3
3
|
Date: 2025-02-13 20:06:07
|
|
4
|
-
LastEditTime: 2025-
|
|
4
|
+
LastEditTime: 2025-08-16 02:07:24
|
|
5
5
|
FilePath: \neverlib\vad\PreProcess.py
|
|
6
6
|
Description:
|
|
7
7
|
'''
|
|
@@ -16,6 +16,9 @@ import noisereduce as nr
|
|
|
16
16
|
|
|
17
17
|
|
|
18
18
|
def pre_emphasis(audio_data, alpha=0.97):
|
|
19
|
+
"""
|
|
20
|
+
预加重
|
|
21
|
+
"""
|
|
19
22
|
# y(n)=x(n)−α⋅x(n−1)
|
|
20
23
|
emphasized_audio = np.append(audio_data[0], audio_data[1:] - alpha * audio_data[:-1])
|
|
21
24
|
return emphasized_audio
|
|
@@ -45,7 +48,7 @@ def NS(wav, sr=16000, stationary=True, prop_decrease=1.):
|
|
|
45
48
|
def NS_test():
|
|
46
49
|
import soundfile as sf
|
|
47
50
|
sr = 16000
|
|
48
|
-
wav_path = "../../
|
|
51
|
+
wav_path = "../../data/vad_example.wav"
|
|
49
52
|
wav, wav_sr = sf.read(wav_path, always_2d=False, dtype="float32")
|
|
50
53
|
wav_NS = NS(wav, sr=sr, stationary=True, prop_decrease=0.6)
|
|
51
54
|
sf.write("../../wav_data/000_short_NS.wav", wav_NS, samplerate=sr)
|
neverlib/vad/VAD_Silero.py
CHANGED
|
@@ -39,7 +39,7 @@ if __name__ == "__main__":
|
|
|
39
39
|
from neverlib.vad.PreProcess import HPFilter, volume_norm
|
|
40
40
|
|
|
41
41
|
sr = 16000
|
|
42
|
-
wav_path = "../../
|
|
42
|
+
wav_path = "../../data/vad_example.wav"
|
|
43
43
|
wav, wav_sr = sf.read(wav_path, always_2d=False, dtype="float32")
|
|
44
44
|
assert wav_sr == sr, f"音频采样率为{wav_sr}, 期望{sr}"
|
|
45
45
|
wav = HPFilter(wav, sr=sr, order=6, cutoff=100)
|
neverlib/vad/VAD_WebRTC.py
CHANGED
|
@@ -38,7 +38,7 @@ if __name__ == "__main__":
|
|
|
38
38
|
from neverlib.vad.PreProcess import HPFilter, volume_norm
|
|
39
39
|
|
|
40
40
|
sr = 16000
|
|
41
|
-
wav_path = "../../
|
|
41
|
+
wav_path = "../../data/vad_example.wav"
|
|
42
42
|
wav, wav_sr = sf.read(wav_path, always_2d=False, dtype="float32")
|
|
43
43
|
assert wav_sr == sr, f"音频采样率为{wav_sr}, 期望{sr}"
|
|
44
44
|
wav = HPFilter(wav, sr=sr, order=6, cutoff=100)
|
neverlib/vad/VAD_funasr.py
CHANGED
|
@@ -31,7 +31,7 @@ if __name__ == "__main__":
|
|
|
31
31
|
from neverlib.vad.PreProcess import HPFilter, volume_norm
|
|
32
32
|
|
|
33
33
|
sr = 16000
|
|
34
|
-
wav_path = "../../
|
|
34
|
+
wav_path = "../../data/vad_example.wav"
|
|
35
35
|
wav, wav_sr = sf.read(wav_path, always_2d=False, dtype="float32")
|
|
36
36
|
assert wav_sr == sr, f"音频采样率为{wav_sr}, 期望{sr}"
|
|
37
37
|
wav = HPFilter(wav, sr=sr, order=6, cutoff=100)
|
neverlib/vad/VAD_vadlib.py
CHANGED
|
@@ -47,7 +47,7 @@ if __name__ == "__main__":
|
|
|
47
47
|
from neverlib.vad.PreProcess import HPFilter, volume_norm
|
|
48
48
|
|
|
49
49
|
sr = 16000
|
|
50
|
-
wav_path = "../../
|
|
50
|
+
wav_path = "../../data/vad_example.wav"
|
|
51
51
|
wav, wav_sr = sf.read(wav_path, always_2d=False, dtype="float32")
|
|
52
52
|
assert wav_sr == sr, f"音频采样率为{wav_sr}, 期望{sr}"
|
|
53
53
|
wav = HPFilter(wav, sr=sr, order=6, cutoff=100)
|
neverlib/vad/VAD_whisper.py
CHANGED
|
@@ -43,7 +43,7 @@ if __name__ == "__main__":
|
|
|
43
43
|
from neverlib.vad.PreProcess import HPFilter, volume_norm
|
|
44
44
|
|
|
45
45
|
sr = 16000
|
|
46
|
-
wav_path = "../../
|
|
46
|
+
wav_path = "../../data/vad_example.wav"
|
|
47
47
|
wav, wav_sr = sf.read(wav_path, always_2d=False, dtype="float32")
|
|
48
48
|
assert wav_sr == sr, f"音频采样率为{wav_sr}, 期望{sr}"
|
|
49
49
|
wav = HPFilter(wav, sr=sr, order=6, cutoff=100)
|