neverlib 0.2.9__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- neverlib/Docs/audio_aug/test_volume.ipynb +8 -8
- neverlib/Docs/filter/biquad.ipynb +1 -1
- neverlib/Docs/filter/filter_family.ipynb +4 -4
- neverlib/Docs/vad/VAD_WebRTC.ipynb +4 -4
- neverlib/Docs/vad/VAD_whisper.ipynb +2 -2
- neverlib/LLM/__init__.py +37 -0
- neverlib/LLM/bailian.py +342 -0
- neverlib/LLM/image.py +73 -0
- neverlib/LLM/text.py +32 -0
- neverlib/QA/ImpactNoiseRejection.py +4 -4
- neverlib/QA/gen_init.py +13 -16
- neverlib/__init__.py +5 -5
- neverlib/audio_aug/HarmonicDistortion.py +11 -11
- neverlib/audio_aug/__init__.py +54 -0
- neverlib/audio_aug/audio_aug.py +18 -18
- neverlib/audio_aug/coder_aug.py +25 -25
- neverlib/audio_aug/coder_aug2.py +10 -10
- neverlib/audio_aug/loss_packet_aug.py +16 -16
- neverlib/audio_aug/quant_aug.py +7 -7
- neverlib/data_analyze/README.md +1 -1
- neverlib/data_analyze/__init__.py +44 -0
- neverlib/data_analyze/dataset_analyzer.py +2 -2
- neverlib/data_analyze/quality_metrics.py +12 -12
- neverlib/data_analyze/statistics.py +1 -1
- neverlib/data_analyze/visualization.py +1 -1
- neverlib/filter/README.md +3 -3
- neverlib/filter/__init__.py +23 -0
- neverlib/filter/auto_eq/README.md +2 -2
- neverlib/filter/auto_eq/__init__.py +36 -0
- neverlib/filter/auto_eq/de_eq.py +1 -1
- neverlib/filter/auto_eq/ga_eq_advanced.py +2 -2
- neverlib/filter/auto_eq/ga_eq_basic.py +1 -1
- neverlib/filter/biquad.py +1 -1
- neverlib/metrics/__init__.py +36 -0
- neverlib/metrics/dnsmos.py +2 -2
- neverlib/metrics/lpc_lsp.py +8 -8
- neverlib/metrics/snr.py +5 -5
- neverlib/metrics/spec.py +23 -23
- neverlib/metrics/test_pesq.py +3 -3
- neverlib/tests/__init__.py +17 -0
- neverlib/tests/test_imports.py +1 -1
- neverlib/utils/README.md +1 -1
- neverlib/utils/__init__.py +53 -1
- neverlib/utils/audio_split.py +1 -1
- neverlib/utils/checkGPU.py +2 -2
- neverlib/utils/floder.py +6 -6
- neverlib/utils/lazy_expose.py +1 -1
- neverlib/utils/lazy_module.py +6 -6
- neverlib/utils/message.py +2 -3
- neverlib/utils/utils.py +108 -2
- neverlib/vad/README.md +5 -5
- neverlib/vad/__init__.py +38 -0
- neverlib/vad/utils.py +1 -1
- {neverlib-0.2.9.dist-info → neverlib-0.3.0.dist-info}/METADATA +3 -3
- {neverlib-0.2.9.dist-info → neverlib-0.3.0.dist-info}/RECORD +58 -57
- neverlib/QA/impact_noise_rejection.png +0 -0
- neverlib/QA/out.pcm +0 -0
- neverlib/QA/out.wav +0 -0
- {neverlib-0.2.9.dist-info → neverlib-0.3.0.dist-info}/WHEEL +0 -0
- {neverlib-0.2.9.dist-info → neverlib-0.3.0.dist-info}/licenses/LICENSE +0 -0
- {neverlib-0.2.9.dist-info → neverlib-0.3.0.dist-info}/top_level.txt +0 -0
|
@@ -32,12 +32,12 @@ class QualityAnalyzer:
|
|
|
32
32
|
|
|
33
33
|
Args:
|
|
34
34
|
signal_audio: 含有信号和噪声的音频
|
|
35
|
-
noise_audio:
|
|
36
|
-
signal_start:
|
|
37
|
-
signal_end:
|
|
35
|
+
noise_audio: 纯噪声音频(可选)
|
|
36
|
+
signal_start: 信号开始位置(当噪声未单独提供时使用)
|
|
37
|
+
signal_end: 信号结束位置(当噪声未单独提供时使用)
|
|
38
38
|
|
|
39
39
|
Returns:
|
|
40
|
-
SNR
|
|
40
|
+
SNR值(dB)
|
|
41
41
|
"""
|
|
42
42
|
if noise_audio is not None:
|
|
43
43
|
# 如果提供了噪声音频
|
|
@@ -72,7 +72,7 @@ class QualityAnalyzer:
|
|
|
72
72
|
|
|
73
73
|
Args:
|
|
74
74
|
audio: 音频信号
|
|
75
|
-
fundamental_freq:
|
|
75
|
+
fundamental_freq: 基频(Hz), 如果不提供则自动检测
|
|
76
76
|
num_harmonics: 考虑的谐波数量
|
|
77
77
|
|
|
78
78
|
Returns:
|
|
@@ -127,7 +127,7 @@ class QualityAnalyzer:
|
|
|
127
127
|
percentile_high: 高百分位数
|
|
128
128
|
|
|
129
129
|
Returns:
|
|
130
|
-
|
|
130
|
+
动态范围(dB)
|
|
131
131
|
"""
|
|
132
132
|
amplitude = np.abs(audio)
|
|
133
133
|
amplitude = amplitude[amplitude > 0] # 避免log(0)
|
|
@@ -148,7 +148,7 @@ class QualityAnalyzer:
|
|
|
148
148
|
|
|
149
149
|
Args:
|
|
150
150
|
audio: 测试音频信号
|
|
151
|
-
reference_audio:
|
|
151
|
+
reference_audio: 参考音频信号(可选)
|
|
152
152
|
|
|
153
153
|
Returns:
|
|
154
154
|
(频率数组, 幅度响应数组)
|
|
@@ -187,11 +187,11 @@ class QualityAnalyzer:
|
|
|
187
187
|
|
|
188
188
|
def loudness_range(self, audio: np.ndarray, gate_threshold: float = -70) -> dict:
|
|
189
189
|
"""
|
|
190
|
-
|
|
190
|
+
计算响度范围(基于EBU R128标准的简化版本)
|
|
191
191
|
|
|
192
192
|
Args:
|
|
193
193
|
audio: 音频信号
|
|
194
|
-
gate_threshold:
|
|
194
|
+
gate_threshold: 门限阈值(dB)
|
|
195
195
|
|
|
196
196
|
Returns:
|
|
197
197
|
响度统计信息字典
|
|
@@ -203,7 +203,7 @@ class QualityAnalyzer:
|
|
|
203
203
|
blocks = []
|
|
204
204
|
for i in range(0, len(audio) - block_size, hop_size):
|
|
205
205
|
block = audio[i:i + block_size]
|
|
206
|
-
#
|
|
206
|
+
# 简化的响度计算(使用RMS近似)
|
|
207
207
|
rms = np.sqrt(np.mean(block ** 2))
|
|
208
208
|
if rms > 0:
|
|
209
209
|
loudness = 20 * np.log10(rms)
|
|
@@ -235,7 +235,7 @@ class QualityAnalyzer:
|
|
|
235
235
|
processed: 处理后音频
|
|
236
236
|
|
|
237
237
|
Returns:
|
|
238
|
-
|
|
238
|
+
谱失真度(dB)
|
|
239
239
|
"""
|
|
240
240
|
# 确保两个信号长度相同
|
|
241
241
|
min_len = min(len(original), len(processed))
|
|
@@ -265,7 +265,7 @@ def comprehensive_quality_assessment(audio: np.ndarray, sr: int = 22050,
|
|
|
265
265
|
Args:
|
|
266
266
|
audio: 待评估音频
|
|
267
267
|
sr: 采样率
|
|
268
|
-
reference:
|
|
268
|
+
reference: 参考音频(可选)
|
|
269
269
|
|
|
270
270
|
Returns:
|
|
271
271
|
质量评估结果字典
|
neverlib/filter/README.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# neverlib.filter
|
|
2
2
|
|
|
3
|
-
本项目包含音频滤波器的实现和自动EQ匹配算法, 主要基于 scipy.signal 进行封装和扩展, 提供便捷的音频滤波器设计、处理功能以及智能EQ
|
|
3
|
+
本项目包含音频滤波器的实现和自动EQ匹配算法, 主要基于 scipy.signal 进行封装和扩展, 提供便捷的音频滤波器设计、处理功能以及智能EQ补偿解决方案.
|
|
4
4
|
|
|
5
5
|
## 主要功能
|
|
6
6
|
|
|
@@ -18,7 +18,7 @@
|
|
|
18
18
|
|
|
19
19
|
### 核心文件说明
|
|
20
20
|
- `filters.py`: 提供 EQFilter 类, 包含多种滤波器的设计和实现
|
|
21
|
-
- `biquad.py`:
|
|
21
|
+
- `biquad.py`: 二阶节(Biquad)滤波器的实现, 支持逐点处理
|
|
22
22
|
- `common.py`: 基础滤波器函数, 提供 numpy/scipy 和 torch 版本
|
|
23
23
|
|
|
24
24
|
### 自动EQ匹配算法 (AudoEQ/)
|
|
@@ -64,7 +64,7 @@ output = [biquad.process(x) for x in input_signal]
|
|
|
64
64
|
对于需要自动EQ匹配的场景, 可以直接运行AudoEQ中的脚本:
|
|
65
65
|
|
|
66
66
|
```bash
|
|
67
|
-
#
|
|
67
|
+
# 快速频谱匹配(推荐入门)
|
|
68
68
|
cd AudoEQ
|
|
69
69
|
python auto_eq_spectral_direct.py
|
|
70
70
|
|
neverlib/filter/__init__.py
CHANGED
|
@@ -1,5 +1,15 @@
|
|
|
1
1
|
# This file is auto-generated. Do NOT edit manually.
|
|
2
2
|
# Generated by neverlib.QA.gen_init
|
|
3
|
+
|
|
4
|
+
from typing import TYPE_CHECKING, Any
|
|
5
|
+
|
|
6
|
+
if TYPE_CHECKING:
|
|
7
|
+
# 仅在类型检查时导入, 提供IDE补全支持
|
|
8
|
+
from .biquad import BiquadFilter
|
|
9
|
+
from .common import HPFilter, HPFilter_torch, LPFilter
|
|
10
|
+
from .core import EQFilter, EQ_test, eq_process, eq_process_test
|
|
11
|
+
|
|
12
|
+
# 运行时使用懒加载
|
|
3
13
|
from lazy_loader import attach
|
|
4
14
|
|
|
5
15
|
__getattr__, __dir__, __all__ = attach(
|
|
@@ -15,3 +25,16 @@ __getattr__, __dir__, __all__ = attach(
|
|
|
15
25
|
"core": ['EQFilter', 'EQ_test', 'eq_process', 'eq_process_test'],
|
|
16
26
|
}
|
|
17
27
|
)
|
|
28
|
+
|
|
29
|
+
# 显式声明 __all__ 以便 IDE 识别
|
|
30
|
+
if TYPE_CHECKING:
|
|
31
|
+
__all__ = [
|
|
32
|
+
'BiquadFilter',
|
|
33
|
+
'HPFilter',
|
|
34
|
+
'HPFilter_torch',
|
|
35
|
+
'LPFilter',
|
|
36
|
+
'EQFilter',
|
|
37
|
+
'EQ_test',
|
|
38
|
+
'eq_process',
|
|
39
|
+
'eq_process_test',
|
|
40
|
+
]
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# Audio EQ Matching Scripts Collection
|
|
2
2
|
|
|
3
|
-
本文件夹包含多种不同算法的音频EQ匹配脚本, 可以自动分析两个音频文件的频谱差异并生成EQ
|
|
3
|
+
本文件夹包含多种不同算法的音频EQ匹配脚本, 可以自动分析两个音频文件的频谱差异并生成EQ补偿参数.
|
|
4
4
|
|
|
5
5
|
## 📁 脚本概览
|
|
6
6
|
|
|
@@ -162,4 +162,4 @@ SAMPLE_RATE = 16000 # 采样率
|
|
|
162
162
|
|
|
163
163
|
## 📄 许可证
|
|
164
164
|
|
|
165
|
-
|
|
165
|
+
本项目仅供学习和研究使用.
|
|
@@ -1,5 +1,16 @@
|
|
|
1
1
|
# This file is auto-generated. Do NOT edit manually.
|
|
2
2
|
# Generated by neverlib.QA.gen_init
|
|
3
|
+
|
|
4
|
+
from typing import TYPE_CHECKING, Any
|
|
5
|
+
|
|
6
|
+
if TYPE_CHECKING:
|
|
7
|
+
# 仅在类型检查时导入, 提供IDE补全支持
|
|
8
|
+
from .de_eq import get_filter_function, match_frequency_response, plot_spectra_comparison
|
|
9
|
+
from .freq_eq import get_freq_eq
|
|
10
|
+
from .ga_eq_advanced import EQConfig, EQOptimizer, load_config_from_yaml, main
|
|
11
|
+
from .ga_eq_basic import custom_mutate, evaluate_individual, generate_active_gene, generate_dbgain_gene, generate_fc_gene, generate_q_gene, generate_type_gene, get_combined_eq_response_db, get_magnitude_spectrum_db, get_single_filter_freq_response_db_from_coeffs, individual_creator, main_ga
|
|
12
|
+
|
|
13
|
+
# 运行时使用懒加载
|
|
3
14
|
from lazy_loader import attach
|
|
4
15
|
|
|
5
16
|
__getattr__, __dir__, __all__ = attach(
|
|
@@ -17,3 +28,28 @@ __getattr__, __dir__, __all__ = attach(
|
|
|
17
28
|
"ga_eq_basic": ['custom_mutate', 'evaluate_individual', 'generate_active_gene', 'generate_dbgain_gene', 'generate_fc_gene', 'generate_q_gene', 'generate_type_gene', 'get_combined_eq_response_db', 'get_magnitude_spectrum_db', 'get_single_filter_freq_response_db_from_coeffs', 'individual_creator', 'main_ga'],
|
|
18
29
|
}
|
|
19
30
|
)
|
|
31
|
+
|
|
32
|
+
# 显式声明 __all__ 以便 IDE 识别
|
|
33
|
+
if TYPE_CHECKING:
|
|
34
|
+
__all__ = [
|
|
35
|
+
'get_filter_function',
|
|
36
|
+
'match_frequency_response',
|
|
37
|
+
'plot_spectra_comparison',
|
|
38
|
+
'get_freq_eq',
|
|
39
|
+
'EQConfig',
|
|
40
|
+
'EQOptimizer',
|
|
41
|
+
'load_config_from_yaml',
|
|
42
|
+
'main',
|
|
43
|
+
'custom_mutate',
|
|
44
|
+
'evaluate_individual',
|
|
45
|
+
'generate_active_gene',
|
|
46
|
+
'generate_dbgain_gene',
|
|
47
|
+
'generate_fc_gene',
|
|
48
|
+
'generate_q_gene',
|
|
49
|
+
'generate_type_gene',
|
|
50
|
+
'get_combined_eq_response_db',
|
|
51
|
+
'get_magnitude_spectrum_db',
|
|
52
|
+
'get_single_filter_freq_response_db_from_coeffs',
|
|
53
|
+
'individual_creator',
|
|
54
|
+
'main_ga',
|
|
55
|
+
]
|
neverlib/filter/auto_eq/de_eq.py
CHANGED
|
@@ -355,4 +355,4 @@ if __name__ == '__main__':
|
|
|
355
355
|
print(f" 频段 {i + 1}: 类型={params['filter_type']}, Fc={params['fc']:.1f}, Q={params['Q']:.2f}" +
|
|
356
356
|
(f", 增益={params['dBgain']:.2f}" if params['dBgain'] is not None else ""))
|
|
357
357
|
else:
|
|
358
|
-
print("未生成EQ
|
|
358
|
+
print("未生成EQ参数或处理中发生错误. ")
|
|
@@ -119,7 +119,7 @@ class EQOptimizer:
|
|
|
119
119
|
|
|
120
120
|
def _setup_deap(self):
|
|
121
121
|
"""设置DEAP遗传算法框架"""
|
|
122
|
-
#
|
|
122
|
+
# 清除之前的注册(如果有的话)
|
|
123
123
|
if hasattr(creator, "FitnessMin"):
|
|
124
124
|
del creator.FitnessMin
|
|
125
125
|
if hasattr(creator, "Individual"):
|
|
@@ -177,7 +177,7 @@ class EQOptimizer:
|
|
|
177
177
|
return individual,
|
|
178
178
|
|
|
179
179
|
def get_magnitude_spectrum_db(self, audio: np.ndarray, sr: int, n_fft: int) -> Tuple[np.ndarray, np.ndarray]:
|
|
180
|
-
"""
|
|
180
|
+
"""获取音频的幅度谱(dB)"""
|
|
181
181
|
f_spec, t_spec, Sxx_spec = signal.spectrogram(
|
|
182
182
|
audio, fs=sr, nperseg=n_fft, noverlap=n_fft // 4,
|
|
183
183
|
scaling='spectrum', mode='magnitude'
|
|
@@ -204,7 +204,7 @@ def evaluate_individual(individual_chromosome):
|
|
|
204
204
|
|
|
205
205
|
# 调整复杂度惩罚项的计算方式, 使其与误差的量级更相关
|
|
206
206
|
# 例如, 如果误差本身就很大, 那么滤波器的数量惩罚可以相对小一些
|
|
207
|
-
# 或者, 如果目标EQ
|
|
207
|
+
# 或者, 如果目标EQ形状本身就很复杂(变化剧烈), 那么多用几个滤波器也是合理的
|
|
208
208
|
# penalty_scale = 1 + np.mean(np.abs(target_eq_shape_db_global)) # 基于目标EQ形状的平均绝对值
|
|
209
209
|
penalty_scale = np.sum(target_eq_shape_db_global**2) / len(target_eq_shape_db_global) if len(target_eq_shape_db_global) > 0 else 1.0
|
|
210
210
|
if penalty_scale < 1e-3:
|
neverlib/filter/biquad.py
CHANGED
neverlib/metrics/__init__.py
CHANGED
|
@@ -1,5 +1,18 @@
|
|
|
1
1
|
# This file is auto-generated. Do NOT edit manually.
|
|
2
2
|
# Generated by neverlib.QA.gen_init
|
|
3
|
+
|
|
4
|
+
from typing import TYPE_CHECKING, Any
|
|
5
|
+
|
|
6
|
+
if TYPE_CHECKING:
|
|
7
|
+
# 仅在类型检查时导入, 提供IDE补全支持
|
|
8
|
+
from .dnsmos import ComputeScore
|
|
9
|
+
from .lpc_lsp import framing, lpc_lsp_distance, lpc_to_lsp
|
|
10
|
+
from .snr import get_snr, get_snr_from_noisy, psnr, seg_snr, si_sdr
|
|
11
|
+
from .spec import lsd, mcd, sd
|
|
12
|
+
from .test_pesq import mos2pesq, pesq2mos
|
|
13
|
+
from .time import dc_offset, mean_rms_amplitude, peak_amplitude, rms_amplitude
|
|
14
|
+
|
|
15
|
+
# 运行时使用懒加载
|
|
3
16
|
from lazy_loader import attach
|
|
4
17
|
|
|
5
18
|
__getattr__, __dir__, __all__ = attach(
|
|
@@ -21,3 +34,26 @@ __getattr__, __dir__, __all__ = attach(
|
|
|
21
34
|
"time": ['dc_offset', 'mean_rms_amplitude', 'peak_amplitude', 'rms_amplitude'],
|
|
22
35
|
}
|
|
23
36
|
)
|
|
37
|
+
|
|
38
|
+
# 显式声明 __all__ 以便 IDE 识别
|
|
39
|
+
if TYPE_CHECKING:
|
|
40
|
+
__all__ = [
|
|
41
|
+
'ComputeScore',
|
|
42
|
+
'framing',
|
|
43
|
+
'lpc_lsp_distance',
|
|
44
|
+
'lpc_to_lsp',
|
|
45
|
+
'get_snr',
|
|
46
|
+
'get_snr_from_noisy',
|
|
47
|
+
'psnr',
|
|
48
|
+
'seg_snr',
|
|
49
|
+
'si_sdr',
|
|
50
|
+
'lsd',
|
|
51
|
+
'mcd',
|
|
52
|
+
'sd',
|
|
53
|
+
'mos2pesq',
|
|
54
|
+
'pesq2mos',
|
|
55
|
+
'dc_offset',
|
|
56
|
+
'mean_rms_amplitude',
|
|
57
|
+
'peak_amplitude',
|
|
58
|
+
'rms_amplitude',
|
|
59
|
+
]
|
neverlib/metrics/dnsmos.py
CHANGED
|
@@ -2,8 +2,8 @@
|
|
|
2
2
|
Author: 凌逆战 | Never
|
|
3
3
|
Date: 2025-08-06 10:00:00
|
|
4
4
|
Description:
|
|
5
|
-
要计算个性化 MOS
|
|
6
|
-
要计算常规 MOS
|
|
5
|
+
要计算个性化 MOS 分数(干扰说话者受到惩罚), 请提供“-p”参数, 例如:python dnsmos.py -t ./SampleClips -o sample.csv -p
|
|
6
|
+
要计算常规 MOS 分数, 请省略“-p”参数. 例如:python dnsmos.py -t ./SampleClips -o sample.csv
|
|
7
7
|
'''
|
|
8
8
|
import librosa
|
|
9
9
|
import numpy as np
|
neverlib/metrics/lpc_lsp.py
CHANGED
|
@@ -7,7 +7,7 @@
|
|
|
7
7
|
主要组件
|
|
8
8
|
|
|
9
9
|
预处理函数:
|
|
10
|
-
- pre_emphasis():
|
|
10
|
+
- pre_emphasis(): 预加重滤波, 增强高频成分
|
|
11
11
|
- framing(): 分帧处理并应用汉明窗
|
|
12
12
|
|
|
13
13
|
LPC分析:
|
|
@@ -16,13 +16,13 @@
|
|
|
16
16
|
|
|
17
17
|
距离计算:
|
|
18
18
|
- lsp_mse(): 计算LSP向量间的均方误差
|
|
19
|
-
- lpc_lsp_distance():
|
|
19
|
+
- lpc_lsp_distance(): 主函数, 返回平均失真度和逐帧失真列表
|
|
20
20
|
|
|
21
21
|
技术特点
|
|
22
22
|
|
|
23
|
-
- 使用soundfile
|
|
24
|
-
- librosa进行LPC
|
|
25
|
-
- 基于LSP
|
|
23
|
+
- 使用soundfile读取音频(支持多种格式)
|
|
24
|
+
- librosa进行LPC分析(替代了自定义算法)
|
|
25
|
+
- 基于LSP的频域失真测量, 对量化误差敏感度更低
|
|
26
26
|
- 逐帧分析捕捉语音时变特性
|
|
27
27
|
|
|
28
28
|
应用场景
|
|
@@ -52,7 +52,7 @@ def framing(signal, frame_size, frame_stride, fs):
|
|
|
52
52
|
|
|
53
53
|
def lpc_to_lsp(a, num_points=512):
|
|
54
54
|
"""
|
|
55
|
-
LPC -> LSP
|
|
55
|
+
LPC -> LSP 转换(简易近似版, 零点搜索法)
|
|
56
56
|
"""
|
|
57
57
|
p = len(a) - 1
|
|
58
58
|
a = np.array(a)
|
|
@@ -91,7 +91,7 @@ def lpc_lsp_distance(ref_wav, test_wav, frame_size=0.025, frame_stride=0.01, ord
|
|
|
91
91
|
ref_frames = framing(ref_sig, frame_size, frame_stride, fs_r)
|
|
92
92
|
test_frames = framing(test_sig, frame_size, frame_stride, fs_t)
|
|
93
93
|
|
|
94
|
-
#
|
|
94
|
+
# 对齐帧数(简单切到最短)
|
|
95
95
|
num_frames = min(len(ref_frames), len(test_frames))
|
|
96
96
|
ref_frames = ref_frames[:num_frames]
|
|
97
97
|
test_frames = test_frames[:num_frames]
|
|
@@ -102,7 +102,7 @@ def lpc_lsp_distance(ref_wav, test_wav, frame_size=0.025, frame_stride=0.01, ord
|
|
|
102
102
|
a_test = librosa.lpc(test_frames[i], order=order)
|
|
103
103
|
lsp_ref = lpc_to_lsp(a_ref)
|
|
104
104
|
lsp_test = lpc_to_lsp(a_test)
|
|
105
|
-
#
|
|
105
|
+
# 对齐长度(简单裁切)
|
|
106
106
|
min_len = min(len(lsp_ref), len(lsp_test))
|
|
107
107
|
# 计算两个 LSP 向量的均方差
|
|
108
108
|
dist = np.mean((lsp_ref[:min_len] - lsp_test[:min_len]) ** 2)
|
neverlib/metrics/snr.py
CHANGED
|
@@ -129,15 +129,15 @@ def psnr(clean, noisy, max_val=None):
|
|
|
129
129
|
|
|
130
130
|
def si_sdr(reference, estimate, epsilon=1e-8):
|
|
131
131
|
"""
|
|
132
|
-
计算尺度不变信噪比 (Scale-Invariant Signal-to-Distortion Ratio, SI-SDR)
|
|
132
|
+
计算尺度不变信噪比 (Scale-Invariant Signal-to-Distortion Ratio, SI-SDR).
|
|
133
133
|
|
|
134
134
|
Args:
|
|
135
|
-
reference (np.ndarray): 原始的、干净的参考信号 (一维数组)
|
|
136
|
-
estimate (np.ndarray): 模型估计或处理后的信号 (一维数组)
|
|
137
|
-
epsilon (float): 一个非常小的数值, 用于防止分母为零,
|
|
135
|
+
reference (np.ndarray): 原始的、干净的参考信号 (一维数组).
|
|
136
|
+
estimate (np.ndarray): 模型估计或处理后的信号 (一维数组).
|
|
137
|
+
epsilon (float): 一个非常小的数值, 用于防止分母为零, 保证数值稳定性.
|
|
138
138
|
|
|
139
139
|
Returns:
|
|
140
|
-
float: SI-SDR 值, 单位为分贝 (dB)
|
|
140
|
+
float: SI-SDR 值, 单位为分贝 (dB).
|
|
141
141
|
"""
|
|
142
142
|
assert reference.shape == estimate.shape, "reference和estimate的维度不一样"
|
|
143
143
|
|
neverlib/metrics/spec.py
CHANGED
|
@@ -9,7 +9,7 @@ Description: 音频信号频域客观度量指标计算工具
|
|
|
9
9
|
|
|
10
10
|
2. LSD (Log-Spectral Distance) - 对数谱距离
|
|
11
11
|
- 在对数功率谱域计算信号距离
|
|
12
|
-
-
|
|
12
|
+
- 更符合人耳听觉特性, 常用于语音质量评估
|
|
13
13
|
|
|
14
14
|
3. MCD (Mel-Cepstral Distance) - 梅尔倒谱距离
|
|
15
15
|
- 基于MFCC特征的音频相似性度量
|
|
@@ -24,16 +24,16 @@ from neverlib.utils import EPS
|
|
|
24
24
|
|
|
25
25
|
def sd(ref_wav, test_wav, n_fft=2048, hop_length=512, win_length=None):
|
|
26
26
|
"""
|
|
27
|
-
计算两个音频信号之间的频谱距离 (Spectral Distance)
|
|
28
|
-
|
|
27
|
+
计算两个音频信号之间的频谱距离 (Spectral Distance).
|
|
28
|
+
该指标衡量两个信号在频域上的差异程度.
|
|
29
29
|
Args:
|
|
30
30
|
ref_wav (np.ndarray): 参考音频信号 (一维数组)
|
|
31
31
|
test_wav (np.ndarray): 测试音频信号 (一维数组)
|
|
32
|
-
n_fft (int): FFT
|
|
33
|
-
hop_length (int):
|
|
34
|
-
win_length (int, optional):
|
|
32
|
+
n_fft (int): FFT点数, 决定频率分辨率, 默认为2048
|
|
33
|
+
hop_length (int): 帧移, 决定时间分辨率, 默认为512
|
|
34
|
+
win_length (int, optional): 窗长, 如果为None则默认为n_fft
|
|
35
35
|
Returns:
|
|
36
|
-
float:
|
|
36
|
+
float: 频谱距离值, 值越小表示两个信号越相似
|
|
37
37
|
"""
|
|
38
38
|
assert len(ref_wav) == len(test_wav), "输入信号长度必须相同"
|
|
39
39
|
|
|
@@ -58,21 +58,21 @@ def sd(ref_wav, test_wav, n_fft=2048, hop_length=512, win_length=None):
|
|
|
58
58
|
|
|
59
59
|
def lsd(ref_wav, test_wav, n_fft=2048, hop_length=512, win_length=None):
|
|
60
60
|
"""
|
|
61
|
-
计算两个一维音频信号之间的对数谱距离 (Log-Spectral Distance, LSD)
|
|
62
|
-
该实现遵循标准的LSD定义:
|
|
61
|
+
计算两个一维音频信号之间的对数谱距离 (Log-Spectral Distance, LSD).
|
|
62
|
+
该实现遵循标准的LSD定义: 整体均方根误差.
|
|
63
63
|
|
|
64
64
|
Args:
|
|
65
|
-
ref_wav (np.ndarray): 原始的、干净的参考信号 (一维数组)
|
|
66
|
-
test_wav (np.ndarray): 模型估计或处理后的信号 (一维数组)
|
|
67
|
-
n_fft (int): FFT点数,
|
|
68
|
-
hop_length (int): 帧移,
|
|
69
|
-
win_length (int, optional):
|
|
70
|
-
epsilon (float): 一个非常小的数值, 用于防止对零取对数,
|
|
65
|
+
ref_wav (np.ndarray): 原始的、干净的参考信号 (一维数组).
|
|
66
|
+
test_wav (np.ndarray): 模型估计或处理后的信号 (一维数组).
|
|
67
|
+
n_fft (int): FFT点数, 决定了频率分辨率.
|
|
68
|
+
hop_length (int): 帧移, 决定了时间分辨率.
|
|
69
|
+
win_length (int, optional): 窗长. 如果为None, 则默认为n_fft.
|
|
70
|
+
epsilon (float): 一个非常小的数值, 用于防止对零取对数, 保证数值稳定性.
|
|
71
71
|
|
|
72
72
|
Returns:
|
|
73
|
-
float: 对数谱距离值, 单位为分贝 (dB)
|
|
73
|
+
float: 对数谱距离值, 单位为分贝 (dB).
|
|
74
74
|
"""
|
|
75
|
-
assert ref_wav.ndim == 1 and test_wav.ndim == 1, "
|
|
75
|
+
assert ref_wav.ndim == 1 and test_wav.ndim == 1, "输入信号必须是一维数组. "
|
|
76
76
|
|
|
77
77
|
if win_length is None:
|
|
78
78
|
win_length = n_fft
|
|
@@ -100,17 +100,17 @@ def lsd(ref_wav, test_wav, n_fft=2048, hop_length=512, win_length=None):
|
|
|
100
100
|
|
|
101
101
|
def mcd(ref_wav, test_wav, sr=16000, n_mfcc=13):
|
|
102
102
|
"""
|
|
103
|
-
计算两个音频信号之间的梅尔倒谱距离 (Mel-Cepstral Distance, MCD)
|
|
104
|
-
|
|
103
|
+
计算两个音频信号之间的梅尔倒谱距离 (Mel-Cepstral Distance, MCD).
|
|
104
|
+
该指标常用于语音合成质量评估, 值越小表示两个信号越相似.
|
|
105
105
|
|
|
106
106
|
Args:
|
|
107
107
|
ref_wav (np.ndarray): 参考音频信号 (一维数组)
|
|
108
108
|
test_wav (np.ndarray): 测试音频信号 (一维数组)
|
|
109
|
-
sr (int):
|
|
110
|
-
n_mfcc (int): MFCC
|
|
109
|
+
sr (int): 采样率, 默认为16000Hz
|
|
110
|
+
n_mfcc (int): MFCC系数个数, 默认为13
|
|
111
111
|
|
|
112
112
|
Returns:
|
|
113
|
-
float:
|
|
113
|
+
float: 梅尔倒谱距离值, 值越小表示两个信号越相似
|
|
114
114
|
|
|
115
115
|
"""
|
|
116
116
|
assert len(ref_wav) == len(test_wav), "输入信号长度必须相同"
|
|
@@ -119,7 +119,7 @@ def mcd(ref_wav, test_wav, sr=16000, n_mfcc=13):
|
|
|
119
119
|
ref_mfcc = librosa.feature.mfcc(y=ref_wav, sr=sr, n_mfcc=n_mfcc)
|
|
120
120
|
test_mfcc = librosa.feature.mfcc(y=test_wav, sr=sr, n_mfcc=n_mfcc)
|
|
121
121
|
|
|
122
|
-
# 计算MCD (跳过0
|
|
122
|
+
# 计算MCD (跳过0阶系数, 因为0阶主要表示能量)
|
|
123
123
|
diff = ref_mfcc[1:] - test_mfcc[1:]
|
|
124
124
|
mcd_value = (10.0 / np.log(10)) * np.sqrt(
|
|
125
125
|
2 * np.mean(np.sum(diff**2, axis=0)))
|
neverlib/metrics/test_pesq.py
CHANGED
|
@@ -3,7 +3,7 @@ Author: 凌逆战 | Never
|
|
|
3
3
|
Date: 2025-08-05 23:37:31
|
|
4
4
|
Description:
|
|
5
5
|
|
|
6
|
-
PESQ 包含 3 种类型的值:NB PESQ MOS、NB MOS LQO、WB MOS LQO
|
|
6
|
+
PESQ 包含 3 种类型的值:NB PESQ MOS、NB MOS LQO、WB MOS LQO. 此包仅返回NB PESQ MOS代表 的Raw MOS分数narrowband handset listening.
|
|
7
7
|
'''
|
|
8
8
|
import pesq
|
|
9
9
|
import pypesq
|
|
@@ -21,11 +21,11 @@ print(pypesq.pesq(clean, enhance, fs=fs)) # 3.817176103591919
|
|
|
21
21
|
|
|
22
22
|
|
|
23
23
|
def pesq2mos(pesq):
|
|
24
|
-
""" 将PESQ值[-0.5, 4.5]映射到MOS-LQO得分[1, 4.5]
|
|
24
|
+
""" 将PESQ值[-0.5, 4.5]映射到MOS-LQO得分[1, 4.5]上, 映射函数来源于:P.862.1 """
|
|
25
25
|
return 0.999 + (4.999 - 0.999) / (1 + np.exp(-1.4945 * pesq + 4.6607))
|
|
26
26
|
|
|
27
27
|
|
|
28
28
|
def mos2pesq(mos):
|
|
29
|
-
""" 将MOS-LQO得分[1, 4.5]映射到PESQ值[-0.5, 4.5]
|
|
29
|
+
""" 将MOS-LQO得分[1, 4.5]映射到PESQ值[-0.5, 4.5]上, 映射函数来源于:P.862.1"""
|
|
30
30
|
inlog = (4.999 - mos) / (mos - 0.999)
|
|
31
31
|
return (4.6607 - np.log(inlog)) / 1.4945
|
neverlib/tests/__init__.py
CHANGED
|
@@ -1,5 +1,14 @@
|
|
|
1
1
|
# This file is auto-generated. Do NOT edit manually.
|
|
2
2
|
# Generated by neverlib.QA.gen_init
|
|
3
|
+
|
|
4
|
+
from typing import TYPE_CHECKING, Any
|
|
5
|
+
|
|
6
|
+
if TYPE_CHECKING:
|
|
7
|
+
# 仅在类型检查时导入, 提供IDE补全支持
|
|
8
|
+
from .test_preprocess import test_NS_shape, test_pre_emphasis
|
|
9
|
+
from .test_vad import test_vad2nad
|
|
10
|
+
|
|
11
|
+
# 运行时使用懒加载
|
|
3
12
|
from lazy_loader import attach
|
|
4
13
|
|
|
5
14
|
__getattr__, __dir__, __all__ = attach(
|
|
@@ -14,3 +23,11 @@ __getattr__, __dir__, __all__ = attach(
|
|
|
14
23
|
"test_vad": ['test_vad2nad'],
|
|
15
24
|
}
|
|
16
25
|
)
|
|
26
|
+
|
|
27
|
+
# 显式声明 __all__ 以便 IDE 识别
|
|
28
|
+
if TYPE_CHECKING:
|
|
29
|
+
__all__ = [
|
|
30
|
+
'test_NS_shape',
|
|
31
|
+
'test_pre_emphasis',
|
|
32
|
+
'test_vad2nad',
|
|
33
|
+
]
|
neverlib/tests/test_imports.py
CHANGED
|
@@ -11,7 +11,7 @@ import time
|
|
|
11
11
|
from neverlib.utils import get_path_list
|
|
12
12
|
from neverlib.data_analyze.dataset_analyzer import AudioFileInfo
|
|
13
13
|
|
|
14
|
-
# 确保当前目录在Python
|
|
14
|
+
# 确保当前目录在Python路径中, 以便导入neverlib
|
|
15
15
|
# sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|
16
16
|
sys.path.append("../..")
|
|
17
17
|
print("开始测试neverlib导入功能...")
|
neverlib/utils/README.md
CHANGED
neverlib/utils/__init__.py
CHANGED
|
@@ -1,5 +1,20 @@
|
|
|
1
1
|
# This file is auto-generated. Do NOT edit manually.
|
|
2
2
|
# Generated by neverlib.QA.gen_init
|
|
3
|
+
|
|
4
|
+
from typing import TYPE_CHECKING, Any
|
|
5
|
+
|
|
6
|
+
if TYPE_CHECKING:
|
|
7
|
+
# 仅在类型检查时导入, 提供IDE补全支持
|
|
8
|
+
from .audio_split import audio_split_VADfunasr, audio_split_VADsilero, audio_split_ffmpeg, audio_split_np, audio_split_pydub, audio_split_random, audio_split_sox
|
|
9
|
+
from .checkGPU import get_gpu_utilization, get_gpu_utilization2, monitor_gpu_utilization
|
|
10
|
+
from .floder import change_path, del_empty_folders, get_leaf_folders, rename_files_and_folders
|
|
11
|
+
from .lazy_expose import attach_and_expose_all
|
|
12
|
+
from .lazy_module import LazyModule
|
|
13
|
+
from .message import send_QQEmail, send_QQEmail_with_images
|
|
14
|
+
from .pcm import pcm2wav, read_pcm, wav2pcm
|
|
15
|
+
from .utils import DatasetSubfloderSplit, TrainValSplit, TrainValTestSplit, del_duplicate_path, get_audio_segments, get_file_time, get_path_list, log_time, merge_tensorboard, remove_parent_dir_prefix, save_weight_histogram
|
|
16
|
+
|
|
17
|
+
# 运行时使用懒加载
|
|
3
18
|
from lazy_loader import attach
|
|
4
19
|
|
|
5
20
|
__getattr__, __dir__, __all__ = attach(
|
|
@@ -22,6 +37,43 @@ __getattr__, __dir__, __all__ = attach(
|
|
|
22
37
|
"lazy_module": ['LazyModule'],
|
|
23
38
|
"message": ['send_QQEmail', 'send_QQEmail_with_images'],
|
|
24
39
|
"pcm": ['pcm2wav', 'read_pcm', 'wav2pcm'],
|
|
25
|
-
"utils": ['DatasetSubfloderSplit', 'TrainValSplit', 'TrainValTestSplit', 'get_audio_segments', 'get_file_time', 'get_path_list', 'save_weight_histogram'],
|
|
40
|
+
"utils": ['DatasetSubfloderSplit', 'TrainValSplit', 'TrainValTestSplit', 'del_duplicate_path', 'get_audio_segments', 'get_file_time', 'get_path_list', 'log_time', 'merge_tensorboard', 'remove_parent_dir_prefix', 'save_weight_histogram'],
|
|
26
41
|
}
|
|
27
42
|
)
|
|
43
|
+
|
|
44
|
+
# 显式声明 __all__ 以便 IDE 识别
|
|
45
|
+
if TYPE_CHECKING:
|
|
46
|
+
__all__ = [
|
|
47
|
+
'audio_split_VADfunasr',
|
|
48
|
+
'audio_split_VADsilero',
|
|
49
|
+
'audio_split_ffmpeg',
|
|
50
|
+
'audio_split_np',
|
|
51
|
+
'audio_split_pydub',
|
|
52
|
+
'audio_split_random',
|
|
53
|
+
'audio_split_sox',
|
|
54
|
+
'get_gpu_utilization',
|
|
55
|
+
'get_gpu_utilization2',
|
|
56
|
+
'monitor_gpu_utilization',
|
|
57
|
+
'change_path',
|
|
58
|
+
'del_empty_folders',
|
|
59
|
+
'get_leaf_folders',
|
|
60
|
+
'rename_files_and_folders',
|
|
61
|
+
'attach_and_expose_all',
|
|
62
|
+
'LazyModule',
|
|
63
|
+
'send_QQEmail',
|
|
64
|
+
'send_QQEmail_with_images',
|
|
65
|
+
'pcm2wav',
|
|
66
|
+
'read_pcm',
|
|
67
|
+
'wav2pcm',
|
|
68
|
+
'DatasetSubfloderSplit',
|
|
69
|
+
'TrainValSplit',
|
|
70
|
+
'TrainValTestSplit',
|
|
71
|
+
'del_duplicate_path',
|
|
72
|
+
'get_audio_segments',
|
|
73
|
+
'get_file_time',
|
|
74
|
+
'get_path_list',
|
|
75
|
+
'log_time',
|
|
76
|
+
'merge_tensorboard',
|
|
77
|
+
'remove_parent_dir_prefix',
|
|
78
|
+
'save_weight_histogram',
|
|
79
|
+
]
|
neverlib/utils/audio_split.py
CHANGED
|
@@ -170,7 +170,7 @@ def audio_split_pydub(source_dir, target_dir, sr, channel_num, duration, endwith
|
|
|
170
170
|
|
|
171
171
|
def audio_split_random(source_dir, target_dir, min_duration=3, max_duration=10, sr=16000):
|
|
172
172
|
"""
|
|
173
|
-
将音频切割成 3 到 10
|
|
173
|
+
将音频切割成 3 到 10 秒的多个片段并保存.
|
|
174
174
|
参数:
|
|
175
175
|
- input_audio_path: 输入音频文件路径
|
|
176
176
|
- output_dir: 输出音频文件夹路径
|