neverlib 0.2.3__py3-none-any.whl → 0.2.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- neverlib/.history/Docs/audio_aug/test_snr_20250806011311.py +0 -0
- neverlib/.history/Docs/audio_aug/test_snr_20250806011331.py +75 -0
- neverlib/.history/Docs/audio_aug/test_snr_20250806011342.py +57 -0
- neverlib/.history/Docs/audio_aug/test_snr_20250806011352.py +57 -0
- neverlib/.history/Docs/audio_aug/test_snr_20250806011403.py +57 -0
- neverlib/.history/Docs/audio_aug/test_snr_20250806011413.py +57 -0
- neverlib/.history/Docs/audio_aug/test_snr_20250806011435.py +55 -0
- neverlib/.history/Docs/vad/1_20250810032405.py +0 -0
- neverlib/.history/Docs/vad/1_20250810032417.py +39 -0
- neverlib/.history/audio_aug/audio_aug_20250806010451.py +125 -0
- neverlib/.history/audio_aug/audio_aug_20250806010750.py +138 -0
- neverlib/.history/audio_aug/audio_aug_20250806010759.py +140 -0
- neverlib/.history/audio_aug/audio_aug_20250806010803.py +140 -0
- neverlib/.history/audio_aug/audio_aug_20250806010809.py +140 -0
- neverlib/.history/audio_aug/audio_aug_20250806011108.py +140 -0
- neverlib/.history/dataAnalyze/__init___20250806204125.py +14 -0
- neverlib/.history/dataAnalyze/__init___20250806204139.py +14 -0
- neverlib/.history/dataAnalyze/__init___20250806204159.py +14 -0
- neverlib/.history/filter/__init___20250820103351.py +70 -0
- neverlib/.history/filter/__init___20250821102348.py +70 -0
- neverlib/.history/filter/__init___20250821102405.py +14 -0
- neverlib/.history/filter/auto_eq/__init___20250819213121.py +36 -0
- neverlib/.history/filter/auto_eq/__init___20250821102241.py +36 -0
- neverlib/.history/filter/auto_eq/__init___20250821102259.py +36 -0
- neverlib/.history/filter/auto_eq/__init___20250821102307.py +36 -0
- neverlib/.history/filter/auto_eq/__init___20250821102310.py +36 -0
- neverlib/.history/filter/auto_eq/__init___20250821102318.py +36 -0
- neverlib/.history/filter/auto_eq/__init___20250821102507.py +36 -0
- neverlib/{filter/AudoEQ/auto_eq_de.py → .history/filter/auto_eq/de_eq_20250820103848.py} +1 -1
- neverlib/.history/filter/auto_eq/de_eq_20250821102422.py +360 -0
- neverlib/.history/filter/auto_eq/freq_eq_20250820140732.py +75 -0
- neverlib/.history/filter/auto_eq/freq_eq_20250820140745.py +75 -0
- neverlib/.history/filter/auto_eq/freq_eq_20250820140816.py +75 -0
- neverlib/.history/filter/auto_eq/freq_eq_20250820140938.py +77 -0
- neverlib/.history/filter/auto_eq/freq_eq_20250820141003.py +77 -0
- neverlib/.history/filter/auto_eq/freq_eq_20250820141006.py +77 -0
- neverlib/.history/filter/auto_eq/freq_eq_20250820141019.py +77 -0
- neverlib/.history/filter/auto_eq/freq_eq_20250820141049.py +77 -0
- neverlib/.history/filter/auto_eq/freq_eq_20250820141211.py +77 -0
- neverlib/.history/filter/auto_eq/freq_eq_20250820141227.py +77 -0
- neverlib/.history/filter/auto_eq/freq_eq_20250820141311.py +78 -0
- neverlib/.history/filter/auto_eq/freq_eq_20250820141340.py +78 -0
- neverlib/.history/filter/auto_eq/freq_eq_20250820141712.py +78 -0
- neverlib/.history/filter/auto_eq/freq_eq_20250820141733.py +78 -0
- neverlib/.history/filter/auto_eq/freq_eq_20250820141755.py +78 -0
- neverlib/.history/filter/auto_eq/freq_eq_20250821102434.py +76 -0
- neverlib/.history/filter/auto_eq/freq_eq_20250821102500.py +76 -0
- neverlib/.history/filter/auto_eq/freq_eq_20250821102502.py +76 -0
- neverlib/{filter/AudoEQ/auto_eq_ga_basic.py → .history/filter/auto_eq/ga_eq_basic_20250820102957.py} +1 -1
- neverlib/.history/filter/auto_eq/ga_eq_basic_20250820113054.py +380 -0
- neverlib/.history/filter/auto_eq/ga_eq_basic_20250820113150.py +380 -0
- neverlib/.history/filter/auto_eq/ga_eq_basic_20250820113520.py +385 -0
- neverlib/.history/filter/auto_eq/ga_eq_basic_20250820113525.py +385 -0
- neverlib/.history/filter/auto_eq/ga_eq_basic_20250821102212.py +385 -0
- neverlib/.history/metrics/dnsmos_20250806001612.py +160 -0
- neverlib/.history/metrics/dnsmos_20250815180659.py +160 -0
- neverlib/.history/metrics/dnsmos_20250815180701.py +158 -0
- neverlib/.history/metrics/dnsmos_20250815181321.py +154 -0
- neverlib/.history/metrics/dnsmos_20250815181327.py +154 -0
- neverlib/.history/metrics/dnsmos_20250815181331.py +154 -0
- neverlib/.history/metrics/dnsmos_20250815181620.py +154 -0
- neverlib/.history/metrics/dnsmos_20250815181631.py +154 -0
- neverlib/.history/metrics/dnsmos_20250815181742.py +154 -0
- neverlib/.history/metrics/dnsmos_20250815181824.py +153 -0
- neverlib/.history/metrics/dnsmos_20250815181834.py +153 -0
- neverlib/.history/metrics/dnsmos_20250815181922.py +153 -0
- neverlib/.history/metrics/dnsmos_20250815182011.py +147 -0
- neverlib/.history/metrics/dnsmos_20250815182036.py +144 -0
- neverlib/.history/metrics/dnsmos_20250815182936.py +143 -0
- neverlib/.history/metrics/dnsmos_20250815182942.py +143 -0
- neverlib/.history/metrics/dnsmos_20250815183032.py +137 -0
- neverlib/.history/metrics/dnsmos_20250815183101.py +144 -0
- neverlib/.history/metrics/dnsmos_20250815183121.py +144 -0
- neverlib/.history/metrics/dnsmos_20250815183123.py +143 -0
- neverlib/.history/metrics/dnsmos_20250815183214.py +143 -0
- neverlib/.history/metrics/dnsmos_20250815183240.py +143 -0
- neverlib/.history/metrics/dnsmos_20250815183248.py +144 -0
- neverlib/.history/metrics/dnsmos_20250815183407.py +142 -0
- neverlib/.history/metrics/dnsmos_20250815183409.py +142 -0
- neverlib/.history/metrics/dnsmos_20250815183431.py +142 -0
- neverlib/.history/metrics/dnsmos_20250815183507.py +140 -0
- neverlib/.history/metrics/dnsmos_20250815183513.py +139 -0
- neverlib/.history/metrics/dnsmos_20250815183618.py +139 -0
- neverlib/.history/metrics/dnsmos_20250815183709.py +140 -0
- neverlib/.history/metrics/dnsmos_20250815183756.py +137 -0
- neverlib/.history/metrics/dnsmos_20250815183815.py +128 -0
- neverlib/.history/metrics/dnsmos_20250815183827.py +129 -0
- neverlib/.history/metrics/dnsmos_20250815183913.py +117 -0
- neverlib/.history/metrics/dnsmos_20250815183914.py +117 -0
- neverlib/.history/metrics/dnsmos_20250815184003.py +118 -0
- neverlib/.history/metrics/dnsmos_20250815184040.py +118 -0
- neverlib/.history/metrics/dnsmos_20250815184049.py +118 -0
- neverlib/.history/metrics/dnsmos_20250815184104.py +117 -0
- neverlib/.history/metrics/dnsmos_20250815184200.py +117 -0
- neverlib/.history/metrics/lpc_lsp_metric_20250816015944.py +128 -0
- neverlib/.history/metrics/lpc_lsp_metric_20250816020142.py +128 -0
- neverlib/.history/metrics/lpc_lsp_metric_20250816020156.py +128 -0
- neverlib/.history/metrics/lpc_lsp_metric_20250816020554.py +130 -0
- neverlib/.history/metrics/lpc_lsp_metric_20250816020600.py +125 -0
- neverlib/.history/metrics/lpc_lsp_metric_20250816020631.py +120 -0
- neverlib/.history/metrics/lpc_lsp_metric_20250816020746.py +118 -0
- neverlib/.history/metrics/lpc_me_20250816013111.py +0 -0
- neverlib/.history/metrics/lpc_me_20250816013129.py +121 -0
- neverlib/.history/metrics/lpc_me_20250816015430.py +103 -0
- neverlib/.history/metrics/lpc_me_20250816015535.py +96 -0
- neverlib/.history/metrics/lpc_me_20250816015542.py +96 -0
- neverlib/.history/metrics/lpc_me_20250816015636.py +97 -0
- neverlib/.history/metrics/lpc_me_20250816015658.py +104 -0
- neverlib/.history/metrics/lpc_me_20250816015703.py +100 -0
- neverlib/.history/metrics/lpc_me_20250816015945.py +128 -0
- neverlib/.history/metrics/snr_20250806010538.py +177 -0
- neverlib/.history/metrics/snr_20250806211634.py +184 -0
- neverlib/.history/metrics/spec_20250805234209.py +45 -0
- neverlib/.history/metrics/spec_20250816135530.py +11 -0
- neverlib/.history/metrics/spec_20250816135654.py +16 -0
- neverlib/.history/metrics/spec_20250816135736.py +68 -0
- neverlib/.history/metrics/spec_20250816135904.py +75 -0
- neverlib/.history/metrics/spec_20250816135921.py +82 -0
- neverlib/.history/metrics/spec_20250816140111.py +82 -0
- neverlib/.history/metrics/spec_20250816140543.py +136 -0
- neverlib/.history/metrics/spec_20250816140559.py +172 -0
- neverlib/.history/metrics/spec_20250816140602.py +172 -0
- neverlib/.history/metrics/spec_20250816140608.py +172 -0
- neverlib/.history/metrics/spec_20250816140654.py +148 -0
- neverlib/.history/metrics/spec_20250816140705.py +144 -0
- neverlib/.history/metrics/spec_20250816140755.py +138 -0
- neverlib/.history/metrics/spec_20250816140823.py +170 -0
- neverlib/.history/metrics/spec_20250816140832.py +170 -0
- neverlib/.history/metrics/spec_20250816140833.py +170 -0
- neverlib/.history/metrics/spec_20250816140922.py +147 -0
- neverlib/.history/metrics/spec_20250816141148.py +107 -0
- neverlib/.history/metrics/spec_20250816141219.py +123 -0
- neverlib/.history/metrics/spec_20250816141732.py +178 -0
- neverlib/.history/metrics/spec_20250816141740.py +178 -0
- neverlib/.history/metrics/spec_20250816142030.py +178 -0
- neverlib/.history/metrics/spec_20250816142107.py +135 -0
- neverlib/.history/metrics/spec_20250816142126.py +135 -0
- neverlib/.history/metrics/spec_20250816142410.py +135 -0
- neverlib/.history/metrics/spec_20250816142415.py +136 -0
- neverlib/.history/metrics/spec_metric_20250816135156.py +0 -0
- neverlib/.history/metrics/spec_metric_20250816135226.py +5 -0
- neverlib/.history/metrics/spec_metric_20250816135227.py +10 -0
- neverlib/.history/metrics/spec_metric_20250816135306.py +15 -0
- neverlib/.history/metrics/spec_metric_20250816135442.py +31 -0
- neverlib/.history/metrics/spec_metric_20250816135448.py +31 -0
- neverlib/.history/metrics/spec_metric_20250816135520.py +29 -0
- neverlib/.history/metrics/spec_metric_20250816135537.py +63 -0
- neverlib/.history/metrics/spec_metric_20250816135653.py +65 -0
- neverlib/.history/vad/PreProcess_20250805234211.py +63 -0
- neverlib/.history/vad/PreProcess_20250809232455.py +63 -0
- neverlib/.history/vad/PreProcess_20250816020725.py +66 -0
- neverlib/.history/vad/VAD_Silero_20250805234211.py +50 -0
- neverlib/.history/vad/VAD_Silero_20250809232456.py +50 -0
- neverlib/.history/vad/VAD_WebRTC_20250805234211.py +61 -0
- neverlib/.history/vad/VAD_WebRTC_20250809232456.py +61 -0
- neverlib/.history/vad/VAD_funasr_20250805234211.py +54 -0
- neverlib/.history/vad/VAD_funasr_20250809232456.py +54 -0
- neverlib/.history/vad/VAD_vadlib_20250805234211.py +70 -0
- neverlib/.history/vad/VAD_vadlib_20250809232455.py +70 -0
- neverlib/.history/vad/VAD_whisper_20250805234211.py +55 -0
- neverlib/.history/vad/VAD_whisper_20250809232456.py +55 -0
- neverlib/.specstory/.what-is-this.md +69 -0
- neverlib/.specstory/history/2025-08-05_17-06Z-/350/277/231/344/270/200/346/255/245/347/232/204/347/233/256/347/232/204/346/230/257/344/273/200/344/271/210.md +424 -0
- neverlib/Docs/audio_aug/test_snr.py +55 -0
- neverlib/audio_aug/HarmonicDistortion.py +79 -0
- neverlib/audio_aug/TFDrop.py +41 -0
- neverlib/audio_aug/TFMask.py +56 -0
- neverlib/audio_aug/audio_aug.py +16 -1
- neverlib/audio_aug/clip_aug.py +41 -0
- neverlib/audio_aug/coder_aug.py +209 -0
- neverlib/audio_aug/coder_aug2.py +118 -0
- neverlib/audio_aug/loss_packet_aug.py +103 -0
- neverlib/audio_aug/quant_aug.py +78 -0
- neverlib/data_analyze/__init__.py +14 -0
- neverlib/filter/auto_eq/__init__.py +36 -0
- neverlib/filter/auto_eq/de_eq.py +360 -0
- neverlib/filter/auto_eq/freq_eq.py +76 -0
- neverlib/filter/{AudoEQ/auto_eq_ga_advanced.py → auto_eq/ga_eq_advanced.py} +1 -1
- neverlib/filter/auto_eq/ga_eq_basic.py +385 -0
- neverlib/metrics/dnsmos.py +58 -101
- neverlib/metrics/lpc_lsp.py +118 -0
- neverlib/metrics/snr.py +11 -4
- neverlib/metrics/spec.py +136 -45
- neverlib/utils/utils.py +17 -14
- neverlib/vad/PreProcess.py +5 -2
- neverlib/vad/VAD_Silero.py +1 -1
- neverlib/vad/VAD_WebRTC.py +1 -1
- neverlib/vad/VAD_funasr.py +1 -1
- neverlib/vad/VAD_vadlib.py +1 -1
- neverlib/vad/VAD_whisper.py +1 -1
- {neverlib-0.2.3.dist-info → neverlib-0.2.4.dist-info}/METADATA +1 -1
- neverlib-0.2.4.dist-info/RECORD +229 -0
- neverlib-0.2.3.dist-info/RECORD +0 -53
- /neverlib/{dataAnalyze/__init__.py → .history/dataAnalyze/__init___20250805234204.py} +0 -0
- /neverlib/{filter/AudoEQ/auto_eq_spectral_direct.py → .history/filter/auto_eq/freq_eq_20250805234206.py} +0 -0
- /neverlib/{dataAnalyze → data_analyze}/README.md +0 -0
- /neverlib/{dataAnalyze → data_analyze}/dataset_analyzer.py +0 -0
- /neverlib/{dataAnalyze → data_analyze}/quality_metrics.py +0 -0
- /neverlib/{dataAnalyze → data_analyze}/rms_distrubution.py +0 -0
- /neverlib/{dataAnalyze → data_analyze}/spectral_analysis.py +0 -0
- /neverlib/{dataAnalyze → data_analyze}/statistics.py +0 -0
- /neverlib/{dataAnalyze → data_analyze}/temporal_features.py +0 -0
- /neverlib/{dataAnalyze → data_analyze}/visualization.py +0 -0
- /neverlib/filter/{AudoEQ → auto_eq}/README.md +0 -0
- {neverlib-0.2.3.dist-info → neverlib-0.2.4.dist-info}/WHEEL +0 -0
- {neverlib-0.2.3.dist-info → neverlib-0.2.4.dist-info}/licenses/LICENSE +0 -0
- {neverlib-0.2.3.dist-info → neverlib-0.2.4.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
'''
|
|
2
|
+
功能描述
|
|
3
|
+
|
|
4
|
+
计算参考语音和测试语音之间的线性预测编码-线谱对(LPC-LSP)参
|
|
5
|
+
数失真度
|
|
6
|
+
|
|
7
|
+
主要组件
|
|
8
|
+
|
|
9
|
+
预处理函数:
|
|
10
|
+
- pre_emphasis(): 预加重滤波,增强高频成分
|
|
11
|
+
- framing(): 分帧处理并应用汉明窗
|
|
12
|
+
|
|
13
|
+
LPC分析:
|
|
14
|
+
- lpc_analysis(): 使用librosa.lpc进行线性预测分析
|
|
15
|
+
- lpc_to_lsp(): LPC系数转换为线谱对参数
|
|
16
|
+
|
|
17
|
+
距离计算:
|
|
18
|
+
- lsp_mse(): 计算LSP向量间的均方误差
|
|
19
|
+
- lpc_lsp_distance(): 主函数,返回平均失真度和逐帧失真列表
|
|
20
|
+
|
|
21
|
+
技术特点
|
|
22
|
+
|
|
23
|
+
- 使用soundfile读取音频(支持多种格式)
|
|
24
|
+
- librosa进行LPC分析(替代了自定义算法)
|
|
25
|
+
- 基于LSP的频域失真测量,对量化误差敏感度更低
|
|
26
|
+
- 逐帧分析捕捉语音时变特性
|
|
27
|
+
|
|
28
|
+
应用场景
|
|
29
|
+
|
|
30
|
+
语音编码器质量评估、语音增强效果测量、语音合成质量分析
|
|
31
|
+
'''
|
|
32
|
+
import numpy as np
|
|
33
|
+
import librosa
|
|
34
|
+
import soundfile as sf
|
|
35
|
+
|
|
36
|
+
def pre_emphasis(signal, coeff=0.97):
|
|
37
|
+
"""预加重"""
|
|
38
|
+
return np.append(signal[0], signal[1:] - coeff * signal[:-1])
|
|
39
|
+
|
|
40
|
+
def framing(signal, frame_size, frame_stride, fs):
|
|
41
|
+
"""分帧 + 汉明窗"""
|
|
42
|
+
frame_length = int(round(frame_size * fs))
|
|
43
|
+
frame_step = int(round(frame_stride * fs))
|
|
44
|
+
|
|
45
|
+
# 使用 librosa 进行分帧
|
|
46
|
+
frames = librosa.util.frame(signal, frame_length=frame_length, hop_length=frame_step, axis=0)
|
|
47
|
+
|
|
48
|
+
# frames的形状是(num_frames, frame_length)
|
|
49
|
+
hamming_window = np.hamming(frame_length)
|
|
50
|
+
frames = frames * hamming_window # 直接广播
|
|
51
|
+
|
|
52
|
+
return frames
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def lpc_analysis(frame, order):
|
|
56
|
+
"""对一帧做 LPC 分析"""
|
|
57
|
+
a = librosa.lpc(frame, order=order)
|
|
58
|
+
return a
|
|
59
|
+
|
|
60
|
+
def lpc_to_lsp(a, num_points=512):
|
|
61
|
+
"""
|
|
62
|
+
LPC -> LSP 转换(简易近似版,零点搜索法)
|
|
63
|
+
"""
|
|
64
|
+
p = len(a) - 1
|
|
65
|
+
a = np.array(a)
|
|
66
|
+
# 构造P(z) Q(z)
|
|
67
|
+
P = np.zeros(p+1)
|
|
68
|
+
Q = np.zeros(p+1)
|
|
69
|
+
for i in range(p+1):
|
|
70
|
+
if i == 0:
|
|
71
|
+
P[i] = 1 + a[i]
|
|
72
|
+
Q[i] = 1 - a[i]
|
|
73
|
+
else:
|
|
74
|
+
P[i] = a[i] + a[p - i]
|
|
75
|
+
Q[i] = a[i] - a[p - i]
|
|
76
|
+
# 频域采样找过零点
|
|
77
|
+
w = np.linspace(0, np.pi, num_points)
|
|
78
|
+
Pw = np.polyval(P[::-1], np.cos(w))
|
|
79
|
+
Qw = np.polyval(Q[::-1], np.cos(w))
|
|
80
|
+
|
|
81
|
+
# 找零点近似位置
|
|
82
|
+
roots_P = w[np.where(np.diff(np.sign(Pw)) != 0)]
|
|
83
|
+
roots_Q = w[np.where(np.diff(np.sign(Qw)) != 0)]
|
|
84
|
+
lsp = np.sort(np.concatenate([roots_P, roots_Q]))
|
|
85
|
+
return lsp
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def lpc_lsp_distance(ref_wav, test_wav, frame_size=0.025, frame_stride=0.01, order=12):
|
|
89
|
+
"""主函数:计算 LPC-LSP 参数失真"""
|
|
90
|
+
ref_sig, fs_r = sf.read(ref_wav, dtype='float32')
|
|
91
|
+
test_sig, fs_t = sf.read(test_wav, dtype='float32')
|
|
92
|
+
|
|
93
|
+
# 预加重
|
|
94
|
+
ref_sig = pre_emphasis(ref_sig)
|
|
95
|
+
test_sig = pre_emphasis(test_sig)
|
|
96
|
+
|
|
97
|
+
# 分帧
|
|
98
|
+
ref_frames = framing(ref_sig, frame_size, frame_stride, fs_r)
|
|
99
|
+
test_frames = framing(test_sig, frame_size, frame_stride, fs_t)
|
|
100
|
+
|
|
101
|
+
# 对齐帧数(简单切到最短)
|
|
102
|
+
num_frames = min(len(ref_frames), len(test_frames))
|
|
103
|
+
ref_frames = ref_frames[:num_frames]
|
|
104
|
+
test_frames = test_frames[:num_frames]
|
|
105
|
+
|
|
106
|
+
distances = []
|
|
107
|
+
for i in range(num_frames):
|
|
108
|
+
a_ref = lpc_analysis(ref_frames[i], order)
|
|
109
|
+
a_test = lpc_analysis(test_frames[i], order)
|
|
110
|
+
lsp_ref = lpc_to_lsp(a_ref)
|
|
111
|
+
lsp_test = lpc_to_lsp(a_test)
|
|
112
|
+
# 对齐长度(简单裁切)
|
|
113
|
+
min_len = min(len(lsp_ref), len(lsp_test))
|
|
114
|
+
# 计算两个 LSP 向量的均方差
|
|
115
|
+
dist = np.mean((lsp_ref[:min_len] - lsp_test[:min_len]) ** 2)
|
|
116
|
+
distances.append(dist)
|
|
117
|
+
|
|
118
|
+
return np.mean(distances), distances
|
|
119
|
+
|
|
120
|
+
if __name__ == "__main__":
|
|
121
|
+
ref_file = "../data/vad_example.wav" # 参考语音文件路径
|
|
122
|
+
test_file = "../data/vad_example.wav" # 测试语音文件路径
|
|
123
|
+
|
|
124
|
+
avg_dist, dist_list = lpc_lsp_distance(ref_file, test_file)
|
|
125
|
+
print(f"平均 LSP MSE 失真: {avg_dist}")
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
'''
|
|
2
|
+
功能描述
|
|
3
|
+
|
|
4
|
+
计算参考语音和测试语音之间的线性预测编码-线谱对(LPC-LSP)参
|
|
5
|
+
数失真度
|
|
6
|
+
|
|
7
|
+
主要组件
|
|
8
|
+
|
|
9
|
+
预处理函数:
|
|
10
|
+
- pre_emphasis(): 预加重滤波,增强高频成分
|
|
11
|
+
- framing(): 分帧处理并应用汉明窗
|
|
12
|
+
|
|
13
|
+
LPC分析:
|
|
14
|
+
- lpc_analysis(): 使用librosa.lpc进行线性预测分析
|
|
15
|
+
- lpc_to_lsp(): LPC系数转换为线谱对参数
|
|
16
|
+
|
|
17
|
+
距离计算:
|
|
18
|
+
- lsp_mse(): 计算LSP向量间的均方误差
|
|
19
|
+
- lpc_lsp_distance(): 主函数,返回平均失真度和逐帧失真列表
|
|
20
|
+
|
|
21
|
+
技术特点
|
|
22
|
+
|
|
23
|
+
- 使用soundfile读取音频(支持多种格式)
|
|
24
|
+
- librosa进行LPC分析(替代了自定义算法)
|
|
25
|
+
- 基于LSP的频域失真测量,对量化误差敏感度更低
|
|
26
|
+
- 逐帧分析捕捉语音时变特性
|
|
27
|
+
|
|
28
|
+
应用场景
|
|
29
|
+
|
|
30
|
+
语音编码器质量评估、语音增强效果测量、语音合成质量分析
|
|
31
|
+
'''
|
|
32
|
+
import numpy as np
|
|
33
|
+
import librosa
|
|
34
|
+
import soundfile as sf
|
|
35
|
+
|
|
36
|
+
def pre_emphasis(signal, coeff=0.97):
|
|
37
|
+
"""预加重"""
|
|
38
|
+
return np.append(signal[0], signal[1:] - coeff * signal[:-1])
|
|
39
|
+
|
|
40
|
+
def framing(signal, frame_size, frame_stride, fs):
|
|
41
|
+
"""分帧 + 汉明窗"""
|
|
42
|
+
frame_length = int(round(frame_size * fs))
|
|
43
|
+
frame_step = int(round(frame_stride * fs))
|
|
44
|
+
|
|
45
|
+
# 使用 librosa 进行分帧
|
|
46
|
+
frames = librosa.util.frame(signal, frame_length=frame_length, hop_length=frame_step, axis=0)
|
|
47
|
+
|
|
48
|
+
# frames的形状是(num_frames, frame_length)
|
|
49
|
+
hamming_window = np.hamming(frame_length)
|
|
50
|
+
frames = frames * hamming_window # 直接广播
|
|
51
|
+
|
|
52
|
+
return frames
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def lpc_to_lsp(a, num_points=512):
|
|
56
|
+
"""
|
|
57
|
+
LPC -> LSP 转换(简易近似版,零点搜索法)
|
|
58
|
+
"""
|
|
59
|
+
p = len(a) - 1
|
|
60
|
+
a = np.array(a)
|
|
61
|
+
# 构造P(z) Q(z)
|
|
62
|
+
P = np.zeros(p+1)
|
|
63
|
+
Q = np.zeros(p+1)
|
|
64
|
+
for i in range(p+1):
|
|
65
|
+
if i == 0:
|
|
66
|
+
P[i] = 1 + a[i]
|
|
67
|
+
Q[i] = 1 - a[i]
|
|
68
|
+
else:
|
|
69
|
+
P[i] = a[i] + a[p - i]
|
|
70
|
+
Q[i] = a[i] - a[p - i]
|
|
71
|
+
# 频域采样找过零点
|
|
72
|
+
w = np.linspace(0, np.pi, num_points)
|
|
73
|
+
Pw = np.polyval(P[::-1], np.cos(w))
|
|
74
|
+
Qw = np.polyval(Q[::-1], np.cos(w))
|
|
75
|
+
|
|
76
|
+
# 找零点近似位置
|
|
77
|
+
roots_P = w[np.where(np.diff(np.sign(Pw)) != 0)]
|
|
78
|
+
roots_Q = w[np.where(np.diff(np.sign(Qw)) != 0)]
|
|
79
|
+
lsp = np.sort(np.concatenate([roots_P, roots_Q]))
|
|
80
|
+
return lsp
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def lpc_lsp_distance(ref_wav, test_wav, frame_size=0.025, frame_stride=0.01, order=12):
|
|
84
|
+
"""主函数:计算 LPC-LSP 参数失真"""
|
|
85
|
+
ref_sig, fs_r = sf.read(ref_wav, dtype='float32')
|
|
86
|
+
test_sig, fs_t = sf.read(test_wav, dtype='float32')
|
|
87
|
+
|
|
88
|
+
# 预加重
|
|
89
|
+
ref_sig = pre_emphasis(ref_sig)
|
|
90
|
+
test_sig = pre_emphasis(test_sig)
|
|
91
|
+
|
|
92
|
+
# 分帧
|
|
93
|
+
ref_frames = framing(ref_sig, frame_size, frame_stride, fs_r)
|
|
94
|
+
test_frames = framing(test_sig, frame_size, frame_stride, fs_t)
|
|
95
|
+
|
|
96
|
+
# 对齐帧数(简单切到最短)
|
|
97
|
+
num_frames = min(len(ref_frames), len(test_frames))
|
|
98
|
+
ref_frames = ref_frames[:num_frames]
|
|
99
|
+
test_frames = test_frames[:num_frames]
|
|
100
|
+
|
|
101
|
+
distances = []
|
|
102
|
+
for i in range(num_frames):
|
|
103
|
+
a_ref = librosa.lpc(ref_frames[i], order=order)
|
|
104
|
+
a_test = librosa.lpc(test_frames[i], order=order)
|
|
105
|
+
lsp_ref = lpc_to_lsp(a_ref)
|
|
106
|
+
lsp_test = lpc_to_lsp(a_test)
|
|
107
|
+
# 对齐长度(简单裁切)
|
|
108
|
+
min_len = min(len(lsp_ref), len(lsp_test))
|
|
109
|
+
# 计算两个 LSP 向量的均方差
|
|
110
|
+
dist = np.mean((lsp_ref[:min_len] - lsp_test[:min_len]) ** 2)
|
|
111
|
+
distances.append(dist)
|
|
112
|
+
|
|
113
|
+
return np.mean(distances), distances
|
|
114
|
+
|
|
115
|
+
if __name__ == "__main__":
|
|
116
|
+
ref_file = "../data/vad_example.wav" # 参考语音文件路径
|
|
117
|
+
test_file = "../data/vad_example.wav" # 测试语音文件路径
|
|
118
|
+
|
|
119
|
+
avg_dist, dist_list = lpc_lsp_distance(ref_file, test_file)
|
|
120
|
+
print(f"平均 LSP MSE 失真: {avg_dist}")
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
'''
|
|
2
|
+
功能描述
|
|
3
|
+
|
|
4
|
+
计算参考语音和测试语音之间的线性预测编码-线谱对(LPC-LSP)参
|
|
5
|
+
数失真度
|
|
6
|
+
|
|
7
|
+
主要组件
|
|
8
|
+
|
|
9
|
+
预处理函数:
|
|
10
|
+
- pre_emphasis(): 预加重滤波,增强高频成分
|
|
11
|
+
- framing(): 分帧处理并应用汉明窗
|
|
12
|
+
|
|
13
|
+
LPC分析:
|
|
14
|
+
- lpc_analysis(): 使用librosa.lpc进行线性预测分析
|
|
15
|
+
- lpc_to_lsp(): LPC系数转换为线谱对参数
|
|
16
|
+
|
|
17
|
+
距离计算:
|
|
18
|
+
- lsp_mse(): 计算LSP向量间的均方误差
|
|
19
|
+
- lpc_lsp_distance(): 主函数,返回平均失真度和逐帧失真列表
|
|
20
|
+
|
|
21
|
+
技术特点
|
|
22
|
+
|
|
23
|
+
- 使用soundfile读取音频(支持多种格式)
|
|
24
|
+
- librosa进行LPC分析(替代了自定义算法)
|
|
25
|
+
- 基于LSP的频域失真测量,对量化误差敏感度更低
|
|
26
|
+
- 逐帧分析捕捉语音时变特性
|
|
27
|
+
|
|
28
|
+
应用场景
|
|
29
|
+
|
|
30
|
+
语音编码器质量评估、语音增强效果测量、语音合成质量分析
|
|
31
|
+
'''
|
|
32
|
+
import numpy as np
|
|
33
|
+
import librosa
|
|
34
|
+
import soundfile as sf
|
|
35
|
+
from neverlib.vad.PreProcess import pre_emphasis
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def framing(signal, frame_size, frame_stride, fs):
|
|
39
|
+
"""分帧 + 汉明窗"""
|
|
40
|
+
frame_length = int(round(frame_size * fs))
|
|
41
|
+
frame_step = int(round(frame_stride * fs))
|
|
42
|
+
|
|
43
|
+
# 使用 librosa 进行分帧
|
|
44
|
+
frames = librosa.util.frame(signal, frame_length=frame_length, hop_length=frame_step, axis=0)
|
|
45
|
+
|
|
46
|
+
# frames的形状是(num_frames, frame_length)
|
|
47
|
+
hamming_window = np.hamming(frame_length)
|
|
48
|
+
frames = frames * hamming_window # 直接广播
|
|
49
|
+
|
|
50
|
+
return frames
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def lpc_to_lsp(a, num_points=512):
|
|
54
|
+
"""
|
|
55
|
+
LPC -> LSP 转换(简易近似版,零点搜索法)
|
|
56
|
+
"""
|
|
57
|
+
p = len(a) - 1
|
|
58
|
+
a = np.array(a)
|
|
59
|
+
# 构造P(z) Q(z)
|
|
60
|
+
P = np.zeros(p+1)
|
|
61
|
+
Q = np.zeros(p+1)
|
|
62
|
+
for i in range(p+1):
|
|
63
|
+
if i == 0:
|
|
64
|
+
P[i] = 1 + a[i]
|
|
65
|
+
Q[i] = 1 - a[i]
|
|
66
|
+
else:
|
|
67
|
+
P[i] = a[i] + a[p - i]
|
|
68
|
+
Q[i] = a[i] - a[p - i]
|
|
69
|
+
# 频域采样找过零点
|
|
70
|
+
w = np.linspace(0, np.pi, num_points)
|
|
71
|
+
Pw = np.polyval(P[::-1], np.cos(w))
|
|
72
|
+
Qw = np.polyval(Q[::-1], np.cos(w))
|
|
73
|
+
|
|
74
|
+
# 找零点近似位置
|
|
75
|
+
roots_P = w[np.where(np.diff(np.sign(Pw)) != 0)]
|
|
76
|
+
roots_Q = w[np.where(np.diff(np.sign(Qw)) != 0)]
|
|
77
|
+
lsp = np.sort(np.concatenate([roots_P, roots_Q]))
|
|
78
|
+
return lsp
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def lpc_lsp_distance(ref_wav, test_wav, frame_size=0.025, frame_stride=0.01, order=12):
|
|
82
|
+
"""主函数:计算 LPC-LSP 参数失真"""
|
|
83
|
+
ref_sig, fs_r = sf.read(ref_wav, dtype='float32')
|
|
84
|
+
test_sig, fs_t = sf.read(test_wav, dtype='float32')
|
|
85
|
+
|
|
86
|
+
# 预加重
|
|
87
|
+
ref_sig = pre_emphasis(ref_sig)
|
|
88
|
+
test_sig = pre_emphasis(test_sig)
|
|
89
|
+
|
|
90
|
+
# 分帧
|
|
91
|
+
ref_frames = framing(ref_sig, frame_size, frame_stride, fs_r)
|
|
92
|
+
test_frames = framing(test_sig, frame_size, frame_stride, fs_t)
|
|
93
|
+
|
|
94
|
+
# 对齐帧数(简单切到最短)
|
|
95
|
+
num_frames = min(len(ref_frames), len(test_frames))
|
|
96
|
+
ref_frames = ref_frames[:num_frames]
|
|
97
|
+
test_frames = test_frames[:num_frames]
|
|
98
|
+
|
|
99
|
+
distances = []
|
|
100
|
+
for i in range(num_frames):
|
|
101
|
+
a_ref = librosa.lpc(ref_frames[i], order=order)
|
|
102
|
+
a_test = librosa.lpc(test_frames[i], order=order)
|
|
103
|
+
lsp_ref = lpc_to_lsp(a_ref)
|
|
104
|
+
lsp_test = lpc_to_lsp(a_test)
|
|
105
|
+
# 对齐长度(简单裁切)
|
|
106
|
+
min_len = min(len(lsp_ref), len(lsp_test))
|
|
107
|
+
# 计算两个 LSP 向量的均方差
|
|
108
|
+
dist = np.mean((lsp_ref[:min_len] - lsp_test[:min_len]) ** 2)
|
|
109
|
+
distances.append(dist)
|
|
110
|
+
|
|
111
|
+
return np.mean(distances), distances
|
|
112
|
+
|
|
113
|
+
if __name__ == "__main__":
|
|
114
|
+
ref_file = "../data/vad_example.wav" # 参考语音文件路径
|
|
115
|
+
test_file = "../data/vad_example.wav" # 测试语音文件路径
|
|
116
|
+
|
|
117
|
+
avg_dist, dist_list = lpc_lsp_distance(ref_file, test_file)
|
|
118
|
+
print(f"平均 LSP MSE 失真: {avg_dist}")
|
|
File without changes
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import scipy.signal
|
|
3
|
+
import scipy.io.wavfile as wav
|
|
4
|
+
from python_speech_features import sigproc
|
|
5
|
+
|
|
6
|
+
def pre_emphasis(signal, coeff=0.97):
|
|
7
|
+
"""预加重"""
|
|
8
|
+
return np.append(signal[0], signal[1:] - coeff * signal[:-1])
|
|
9
|
+
|
|
10
|
+
def framing(signal, frame_size, frame_stride, fs):
|
|
11
|
+
"""分帧 + 汉明窗"""
|
|
12
|
+
frame_length = int(round(frame_size * fs))
|
|
13
|
+
frame_step = int(round(frame_stride * fs))
|
|
14
|
+
frames = sigproc.framesig(signal, frame_length, frame_step, winfunc=np.hamming)
|
|
15
|
+
return frames
|
|
16
|
+
|
|
17
|
+
def levinson_durbin(r, order):
|
|
18
|
+
"""Levinson-Durbin 算法求 LPC 系数"""
|
|
19
|
+
a = np.zeros(order + 1)
|
|
20
|
+
e = r[0]
|
|
21
|
+
a[0] = 1.0
|
|
22
|
+
|
|
23
|
+
for i in range(1, order + 1):
|
|
24
|
+
acc = r[i]
|
|
25
|
+
for j in range(1, i):
|
|
26
|
+
acc += a[j] * r[i - j]
|
|
27
|
+
k = -acc / e
|
|
28
|
+
a_new = a.copy()
|
|
29
|
+
a_new[i] = k
|
|
30
|
+
for j in range(1, i):
|
|
31
|
+
a_new[j] += k * a[i - j]
|
|
32
|
+
a = a_new
|
|
33
|
+
e *= 1.0 - k * k
|
|
34
|
+
return a, e
|
|
35
|
+
|
|
36
|
+
def lpc_analysis(frame, order):
|
|
37
|
+
"""对一帧做 LPC 分析"""
|
|
38
|
+
autocorr = np.correlate(frame, frame, mode='full')
|
|
39
|
+
r = autocorr[len(frame)-1:len(frame)+order]
|
|
40
|
+
a, e = levinson_durbin(r, order)
|
|
41
|
+
return a
|
|
42
|
+
|
|
43
|
+
def lpc_to_lsp(a, num_points=512):
|
|
44
|
+
"""
|
|
45
|
+
LPC -> LSP 转换(简易近似版,零点搜索法)
|
|
46
|
+
"""
|
|
47
|
+
p = len(a) - 1
|
|
48
|
+
a = np.array(a)
|
|
49
|
+
# 构造P(z) Q(z)
|
|
50
|
+
P = np.zeros(p+1)
|
|
51
|
+
Q = np.zeros(p+1)
|
|
52
|
+
for i in range(p+1):
|
|
53
|
+
if i == 0:
|
|
54
|
+
P[i] = 1 + a[i]
|
|
55
|
+
Q[i] = 1 - a[i]
|
|
56
|
+
else:
|
|
57
|
+
P[i] = a[i] + a[p - i]
|
|
58
|
+
Q[i] = a[i] - a[p - i]
|
|
59
|
+
# 频域采样找过零点
|
|
60
|
+
w = np.linspace(0, np.pi, num_points)
|
|
61
|
+
Pw = np.polyval(P[::-1], np.cos(w))
|
|
62
|
+
Qw = np.polyval(Q[::-1], np.cos(w))
|
|
63
|
+
|
|
64
|
+
# 找零点近似位置
|
|
65
|
+
roots_P = w[np.where(np.diff(np.sign(Pw)) != 0)]
|
|
66
|
+
roots_Q = w[np.where(np.diff(np.sign(Qw)) != 0)]
|
|
67
|
+
lsp = np.sort(np.concatenate([roots_P, roots_Q]))
|
|
68
|
+
return lsp
|
|
69
|
+
|
|
70
|
+
def lsp_mse(lsp1, lsp2):
|
|
71
|
+
"""计算两个 LSP 向量的均方差"""
|
|
72
|
+
return np.mean((lsp1 - lsp2) ** 2)
|
|
73
|
+
|
|
74
|
+
def lpc_lsp_distance(ref_wav, test_wav, frame_size=0.025, frame_stride=0.01, order=12):
|
|
75
|
+
"""主函数:计算 LPC-LSP 参数失真"""
|
|
76
|
+
fs_r, ref_sig = wav.read(ref_wav)
|
|
77
|
+
fs_t, test_sig = wav.read(test_wav)
|
|
78
|
+
|
|
79
|
+
if fs_r != fs_t:
|
|
80
|
+
raise ValueError("采样率不一致!")
|
|
81
|
+
|
|
82
|
+
# 转 float + 单声道
|
|
83
|
+
if ref_sig.ndim > 1:
|
|
84
|
+
ref_sig = ref_sig[:,0]
|
|
85
|
+
if test_sig.ndim > 1:
|
|
86
|
+
test_sig = test_sig[:,0]
|
|
87
|
+
ref_sig = ref_sig.astype(np.float64)
|
|
88
|
+
test_sig = test_sig.astype(np.float64)
|
|
89
|
+
|
|
90
|
+
# 预加重
|
|
91
|
+
ref_sig = pre_emphasis(ref_sig)
|
|
92
|
+
test_sig = pre_emphasis(test_sig)
|
|
93
|
+
|
|
94
|
+
# 分帧
|
|
95
|
+
ref_frames = framing(ref_sig, frame_size, frame_stride, fs_r)
|
|
96
|
+
test_frames = framing(test_sig, frame_size, frame_stride, fs_t)
|
|
97
|
+
|
|
98
|
+
# 对齐帧数(简单切到最短)
|
|
99
|
+
num_frames = min(len(ref_frames), len(test_frames))
|
|
100
|
+
ref_frames = ref_frames[:num_frames]
|
|
101
|
+
test_frames = test_frames[:num_frames]
|
|
102
|
+
|
|
103
|
+
distances = []
|
|
104
|
+
for i in range(num_frames):
|
|
105
|
+
a_ref = lpc_analysis(ref_frames[i], order)
|
|
106
|
+
a_test = lpc_analysis(test_frames[i], order)
|
|
107
|
+
lsp_ref = lpc_to_lsp(a_ref)
|
|
108
|
+
lsp_test = lpc_to_lsp(a_test)
|
|
109
|
+
# 对齐长度(简单裁切)
|
|
110
|
+
min_len = min(len(lsp_ref), len(lsp_test))
|
|
111
|
+
dist = lsp_mse(lsp_ref[:min_len], lsp_test[:min_len])
|
|
112
|
+
distances.append(dist)
|
|
113
|
+
|
|
114
|
+
return np.mean(distances), distances
|
|
115
|
+
|
|
116
|
+
if __name__ == "__main__":
|
|
117
|
+
ref_file = "ref.wav" # 参考语音文件路径
|
|
118
|
+
test_file = "test.wav" # 测试语音文件路径
|
|
119
|
+
|
|
120
|
+
avg_dist, dist_list = lpc_lsp_distance(ref_file, test_file)
|
|
121
|
+
print(f"平均 LSP MSE 失真: {avg_dist}")
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import librosa
|
|
3
|
+
|
|
4
|
+
def pre_emphasis(signal, coeff=0.97):
|
|
5
|
+
"""预加重"""
|
|
6
|
+
return np.append(signal[0], signal[1:] - coeff * signal[:-1])
|
|
7
|
+
|
|
8
|
+
def framing(signal, frame_size, frame_stride, fs):
|
|
9
|
+
"""分帧 + 汉明窗"""
|
|
10
|
+
frame_length = int(round(frame_size * fs))
|
|
11
|
+
frame_step = int(round(frame_stride * fs))
|
|
12
|
+
|
|
13
|
+
# 使用 librosa 进行分帧
|
|
14
|
+
frames = librosa.util.frame(signal, frame_length=frame_length, hop_length=frame_step, axis=0)
|
|
15
|
+
|
|
16
|
+
# 应用汉明窗
|
|
17
|
+
hamming_window = np.hamming(frame_length)
|
|
18
|
+
frames = frames.T * hamming_window
|
|
19
|
+
|
|
20
|
+
return frames
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def lpc_analysis(frame, order):
|
|
24
|
+
"""对一帧做 LPC 分析"""
|
|
25
|
+
a = librosa.lpc(frame, order=order)
|
|
26
|
+
return a
|
|
27
|
+
|
|
28
|
+
def lpc_to_lsp(a, num_points=512):
|
|
29
|
+
"""
|
|
30
|
+
LPC -> LSP 转换(简易近似版,零点搜索法)
|
|
31
|
+
"""
|
|
32
|
+
p = len(a) - 1
|
|
33
|
+
a = np.array(a)
|
|
34
|
+
# 构造P(z) Q(z)
|
|
35
|
+
P = np.zeros(p+1)
|
|
36
|
+
Q = np.zeros(p+1)
|
|
37
|
+
for i in range(p+1):
|
|
38
|
+
if i == 0:
|
|
39
|
+
P[i] = 1 + a[i]
|
|
40
|
+
Q[i] = 1 - a[i]
|
|
41
|
+
else:
|
|
42
|
+
P[i] = a[i] + a[p - i]
|
|
43
|
+
Q[i] = a[i] - a[p - i]
|
|
44
|
+
# 频域采样找过零点
|
|
45
|
+
w = np.linspace(0, np.pi, num_points)
|
|
46
|
+
Pw = np.polyval(P[::-1], np.cos(w))
|
|
47
|
+
Qw = np.polyval(Q[::-1], np.cos(w))
|
|
48
|
+
|
|
49
|
+
# 找零点近似位置
|
|
50
|
+
roots_P = w[np.where(np.diff(np.sign(Pw)) != 0)]
|
|
51
|
+
roots_Q = w[np.where(np.diff(np.sign(Qw)) != 0)]
|
|
52
|
+
lsp = np.sort(np.concatenate([roots_P, roots_Q]))
|
|
53
|
+
return lsp
|
|
54
|
+
|
|
55
|
+
def lsp_mse(lsp1, lsp2):
|
|
56
|
+
"""计算两个 LSP 向量的均方差"""
|
|
57
|
+
return np.mean((lsp1 - lsp2) ** 2)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def lpc_lsp_distance(ref_wav, test_wav, frame_size=0.025, frame_stride=0.01, order=12):
|
|
61
|
+
"""主函数:计算 LPC-LSP 参数失真"""
|
|
62
|
+
ref_sig, fs_r = librosa.load(ref_wav, sr=None)
|
|
63
|
+
test_sig, fs_t = librosa.load(test_wav, sr=None)
|
|
64
|
+
|
|
65
|
+
if fs_r != fs_t:
|
|
66
|
+
raise ValueError("采样率不一致!")
|
|
67
|
+
|
|
68
|
+
# librosa 已经加载为 float32 单声道,转换为 float64
|
|
69
|
+
ref_sig = ref_sig.astype(np.float64)
|
|
70
|
+
test_sig = test_sig.astype(np.float64)
|
|
71
|
+
|
|
72
|
+
# 预加重
|
|
73
|
+
ref_sig = pre_emphasis(ref_sig)
|
|
74
|
+
test_sig = pre_emphasis(test_sig)
|
|
75
|
+
|
|
76
|
+
# 分帧
|
|
77
|
+
ref_frames = framing(ref_sig, frame_size, frame_stride, fs_r)
|
|
78
|
+
test_frames = framing(test_sig, frame_size, frame_stride, fs_t)
|
|
79
|
+
|
|
80
|
+
# 对齐帧数(简单切到最短)
|
|
81
|
+
num_frames = min(len(ref_frames), len(test_frames))
|
|
82
|
+
ref_frames = ref_frames[:num_frames]
|
|
83
|
+
test_frames = test_frames[:num_frames]
|
|
84
|
+
|
|
85
|
+
distances = []
|
|
86
|
+
for i in range(num_frames):
|
|
87
|
+
a_ref = lpc_analysis(ref_frames[i], order)
|
|
88
|
+
a_test = lpc_analysis(test_frames[i], order)
|
|
89
|
+
lsp_ref = lpc_to_lsp(a_ref)
|
|
90
|
+
lsp_test = lpc_to_lsp(a_test)
|
|
91
|
+
# 对齐长度(简单裁切)
|
|
92
|
+
min_len = min(len(lsp_ref), len(lsp_test))
|
|
93
|
+
dist = lsp_mse(lsp_ref[:min_len], lsp_test[:min_len])
|
|
94
|
+
distances.append(dist)
|
|
95
|
+
|
|
96
|
+
return np.mean(distances), distances
|
|
97
|
+
|
|
98
|
+
if __name__ == "__main__":
|
|
99
|
+
ref_file = "ref.wav" # 参考语音文件路径
|
|
100
|
+
test_file = "test.wav" # 测试语音文件路径
|
|
101
|
+
|
|
102
|
+
avg_dist, dist_list = lpc_lsp_distance(ref_file, test_file)
|
|
103
|
+
print(f"平均 LSP MSE 失真: {avg_dist}")
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import librosa
|
|
3
|
+
|
|
4
|
+
def pre_emphasis(signal, coeff=0.97):
|
|
5
|
+
"""预加重"""
|
|
6
|
+
return np.append(signal[0], signal[1:] - coeff * signal[:-1])
|
|
7
|
+
|
|
8
|
+
def framing(signal, frame_size, frame_stride, fs):
|
|
9
|
+
"""分帧 + 汉明窗"""
|
|
10
|
+
frame_length = int(round(frame_size * fs))
|
|
11
|
+
frame_step = int(round(frame_stride * fs))
|
|
12
|
+
|
|
13
|
+
# 使用 librosa 进行分帧
|
|
14
|
+
frames = librosa.util.frame(signal, frame_length=frame_length, hop_length=frame_step, axis=0)
|
|
15
|
+
|
|
16
|
+
# 应用汉明窗
|
|
17
|
+
hamming_window = np.hamming(frame_length)
|
|
18
|
+
frames = frames.T * hamming_window
|
|
19
|
+
|
|
20
|
+
return frames
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def lpc_analysis(frame, order):
|
|
24
|
+
"""对一帧做 LPC 分析"""
|
|
25
|
+
a = librosa.lpc(frame, order=order)
|
|
26
|
+
return a
|
|
27
|
+
|
|
28
|
+
def lpc_to_lsp(a, num_points=512):
|
|
29
|
+
"""
|
|
30
|
+
LPC -> LSP 转换(简易近似版,零点搜索法)
|
|
31
|
+
"""
|
|
32
|
+
p = len(a) - 1
|
|
33
|
+
a = np.array(a)
|
|
34
|
+
# 构造P(z) Q(z)
|
|
35
|
+
P = np.zeros(p+1)
|
|
36
|
+
Q = np.zeros(p+1)
|
|
37
|
+
for i in range(p+1):
|
|
38
|
+
if i == 0:
|
|
39
|
+
P[i] = 1 + a[i]
|
|
40
|
+
Q[i] = 1 - a[i]
|
|
41
|
+
else:
|
|
42
|
+
P[i] = a[i] + a[p - i]
|
|
43
|
+
Q[i] = a[i] - a[p - i]
|
|
44
|
+
# 频域采样找过零点
|
|
45
|
+
w = np.linspace(0, np.pi, num_points)
|
|
46
|
+
Pw = np.polyval(P[::-1], np.cos(w))
|
|
47
|
+
Qw = np.polyval(Q[::-1], np.cos(w))
|
|
48
|
+
|
|
49
|
+
# 找零点近似位置
|
|
50
|
+
roots_P = w[np.where(np.diff(np.sign(Pw)) != 0)]
|
|
51
|
+
roots_Q = w[np.where(np.diff(np.sign(Qw)) != 0)]
|
|
52
|
+
lsp = np.sort(np.concatenate([roots_P, roots_Q]))
|
|
53
|
+
return lsp
|
|
54
|
+
|
|
55
|
+
def lsp_mse(lsp1, lsp2):
|
|
56
|
+
"""计算两个 LSP 向量的均方差"""
|
|
57
|
+
return np.mean((lsp1 - lsp2) ** 2)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def lpc_lsp_distance(ref_wav, test_wav, frame_size=0.025, frame_stride=0.01, order=12):
|
|
61
|
+
"""主函数:计算 LPC-LSP 参数失真"""
|
|
62
|
+
ref_sig, fs_r = librosa.load(ref_wav, sr=None)
|
|
63
|
+
test_sig, fs_t = librosa.load(test_wav, sr=None)
|
|
64
|
+
|
|
65
|
+
# 预加重
|
|
66
|
+
ref_sig = pre_emphasis(ref_sig)
|
|
67
|
+
test_sig = pre_emphasis(test_sig)
|
|
68
|
+
|
|
69
|
+
# 分帧
|
|
70
|
+
ref_frames = framing(ref_sig, frame_size, frame_stride, fs_r)
|
|
71
|
+
test_frames = framing(test_sig, frame_size, frame_stride, fs_t)
|
|
72
|
+
|
|
73
|
+
# 对齐帧数(简单切到最短)
|
|
74
|
+
num_frames = min(len(ref_frames), len(test_frames))
|
|
75
|
+
ref_frames = ref_frames[:num_frames]
|
|
76
|
+
test_frames = test_frames[:num_frames]
|
|
77
|
+
|
|
78
|
+
distances = []
|
|
79
|
+
for i in range(num_frames):
|
|
80
|
+
a_ref = lpc_analysis(ref_frames[i], order)
|
|
81
|
+
a_test = lpc_analysis(test_frames[i], order)
|
|
82
|
+
lsp_ref = lpc_to_lsp(a_ref)
|
|
83
|
+
lsp_test = lpc_to_lsp(a_test)
|
|
84
|
+
# 对齐长度(简单裁切)
|
|
85
|
+
min_len = min(len(lsp_ref), len(lsp_test))
|
|
86
|
+
dist = lsp_mse(lsp_ref[:min_len], lsp_test[:min_len])
|
|
87
|
+
distances.append(dist)
|
|
88
|
+
|
|
89
|
+
return np.mean(distances), distances
|
|
90
|
+
|
|
91
|
+
if __name__ == "__main__":
|
|
92
|
+
ref_file = "ref.wav" # 参考语音文件路径
|
|
93
|
+
test_file = "test.wav" # 测试语音文件路径
|
|
94
|
+
|
|
95
|
+
avg_dist, dist_list = lpc_lsp_distance(ref_file, test_file)
|
|
96
|
+
print(f"平均 LSP MSE 失真: {avg_dist}")
|