neverlib 0.2.6__py3-none-any.whl → 0.2.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- neverlib/.claude/settings.local.json +9 -0
- neverlib/Docs/audio_aug/test_volume.ipynb +416 -0
- neverlib/Docs/audio_aug_test/test_volume.ipynb +289 -0
- neverlib/Docs/filter/biquad.ipynb +129 -0
- neverlib/Docs/filter/filter_family.ipynb +450 -0
- neverlib/Docs/filter/highpass.ipynb +139 -0
- neverlib/Docs/filter/scipy_filter_family.ipynb +110 -0
- neverlib/Docs/vad/VAD_Energy.ipynb +167 -0
- neverlib/Docs/vad/VAD_Silero.ipynb +325 -0
- neverlib/Docs/vad/VAD_WebRTC.ipynb +189 -0
- neverlib/Docs/vad/VAD_funasr.ipynb +192 -0
- neverlib/Docs/vad/VAD_rvADfast.ipynb +162 -0
- neverlib/Docs/vad/VAD_statistics.ipynb +532 -0
- neverlib/Docs/vad/VAD_tenVAD.ipynb +292 -0
- neverlib/Docs/vad/VAD_vadlib.ipynb +168 -0
- neverlib/Docs/vad/VAD_whisper.ipynb +404 -0
- neverlib/QA/gen_init.py +117 -0
- neverlib/QA/get_fun.py +19 -0
- neverlib/__init__.py +21 -4
- neverlib/audio_aug/HarmonicDistortion.py +19 -13
- neverlib/audio_aug/__init__.py +30 -12
- neverlib/audio_aug/audio_aug.py +19 -14
- neverlib/audio_aug/clip_aug.py +15 -18
- neverlib/audio_aug/coder_aug.py +44 -24
- neverlib/audio_aug/coder_aug2.py +54 -37
- neverlib/audio_aug/loss_packet_aug.py +7 -7
- neverlib/audio_aug/quant_aug.py +19 -17
- neverlib/data/000_short_enhance.wav +0 -0
- neverlib/data/3956_speech.wav +0 -0
- neverlib/data/3956_sweep.wav +0 -0
- neverlib/data/vad_example.wav +0 -0
- neverlib/data/white.wav +0 -0
- neverlib/data/white_EQ.wav +0 -0
- neverlib/data/white_matched.wav +0 -0
- neverlib/data_analyze/__init__.py +25 -20
- neverlib/data_analyze/dataset_analyzer.py +109 -114
- neverlib/data_analyze/quality_metrics.py +87 -89
- neverlib/data_analyze/rms_distrubution.py +23 -42
- neverlib/data_analyze/spectral_analysis.py +43 -46
- neverlib/data_analyze/statistics.py +76 -76
- neverlib/data_analyze/temporal_features.py +15 -6
- neverlib/data_analyze/visualization.py +208 -144
- neverlib/filter/__init__.py +17 -20
- neverlib/filter/auto_eq/__init__.py +18 -35
- neverlib/filter/auto_eq/de_eq.py +0 -2
- neverlib/filter/common.py +24 -5
- neverlib/metrics/DNSMOS/bak_ovr.onnx +0 -0
- neverlib/metrics/DNSMOS/model_v8.onnx +0 -0
- neverlib/metrics/DNSMOS/sig.onnx +0 -0
- neverlib/metrics/DNSMOS/sig_bak_ovr.onnx +0 -0
- neverlib/metrics/__init__.py +23 -0
- neverlib/metrics/dnsmos.py +4 -15
- neverlib/metrics/pDNSMOS/sig_bak_ovr.onnx +0 -0
- neverlib/metrics/pesq_c/PESQ +0 -0
- neverlib/metrics/pesq_c/dsp.c +553 -0
- neverlib/metrics/pesq_c/dsp.h +138 -0
- neverlib/metrics/pesq_c/pesq.h +294 -0
- neverlib/metrics/pesq_c/pesqdsp.c +1047 -0
- neverlib/metrics/pesq_c/pesqio.c +392 -0
- neverlib/metrics/pesq_c/pesqmain.c +610 -0
- neverlib/metrics/pesq_c/pesqmod.c +1417 -0
- neverlib/metrics/pesq_c/pesqpar.h +297 -0
- neverlib/metrics/snr.py +5 -1
- neverlib/metrics/spec.py +31 -21
- neverlib/metrics/test_pesq.py +0 -4
- neverlib/tests/test_imports.py +17 -0
- neverlib/utils/__init__.py +26 -15
- neverlib/utils/audio_split.py +5 -1
- neverlib/utils/checkGPU.py +17 -9
- neverlib/utils/lazy_expose.py +29 -0
- neverlib/utils/utils.py +40 -12
- neverlib/vad/__init__.py +33 -25
- neverlib/vad/class_get_speech.py +1 -1
- neverlib/vad/class_vad.py +3 -3
- neverlib/vad/img.png +0 -0
- {neverlib-0.2.6.dist-info → neverlib-0.2.7.dist-info}/METADATA +1 -1
- {neverlib-0.2.6.dist-info → neverlib-0.2.7.dist-info}/RECORD +80 -37
- {neverlib-0.2.6.dist-info → neverlib-0.2.7.dist-info}/WHEEL +0 -0
- {neverlib-0.2.6.dist-info → neverlib-0.2.7.dist-info}/licenses/LICENSE +0 -0
- {neverlib-0.2.6.dist-info → neverlib-0.2.7.dist-info}/top_level.txt +0 -0
neverlib/audio_aug/coder_aug2.py
CHANGED
|
@@ -3,23 +3,23 @@ Author: 凌逆战 | Never
|
|
|
3
3
|
Date: 2025-07-29 17:57:26
|
|
4
4
|
Description:
|
|
5
5
|
'''
|
|
6
|
-
import numpy as np
|
|
7
|
-
import librosa
|
|
8
|
-
import soundfile as sf
|
|
9
|
-
import subprocess
|
|
10
6
|
import os
|
|
11
7
|
import random
|
|
12
|
-
import
|
|
8
|
+
import soundfile as sf
|
|
9
|
+
import subprocess
|
|
13
10
|
|
|
14
11
|
|
|
15
12
|
def check_codec_available(codec):
|
|
16
13
|
"""检查指定的编解码器是否在FFmpeg中可用"""
|
|
17
14
|
try:
|
|
18
|
-
result = subprocess.run(['ffmpeg', '-encoders'],
|
|
15
|
+
result = subprocess.run(['ffmpeg', '-encoders'],
|
|
16
|
+
capture_output=True,
|
|
17
|
+
text=True)
|
|
19
18
|
return codec in result.stdout
|
|
20
19
|
except:
|
|
21
20
|
return False
|
|
22
21
|
|
|
22
|
+
|
|
23
23
|
def apply_codec_distortion(wav, sr, codec='libopus', bitrate='24k'):
|
|
24
24
|
"""
|
|
25
25
|
使用 FFmpeg 对音频应用指定的编解码器和码率, 以模拟有损压缩失真。
|
|
@@ -56,12 +56,15 @@ def apply_codec_distortion(wav, sr, codec='libopus', bitrate='24k'):
|
|
|
56
56
|
sf.write(input_filename, wav, sr)
|
|
57
57
|
|
|
58
58
|
# 2. 构建 FFmpeg 命令
|
|
59
|
-
command = [
|
|
59
|
+
command = [
|
|
60
|
+
'ffmpeg', '-y', '-i', input_filename, '-c:a', codec, '-b:a',
|
|
61
|
+
bitrate
|
|
62
|
+
]
|
|
60
63
|
|
|
61
64
|
# 3. 为 AMR 编解码器添加重采样参数
|
|
62
65
|
if codec in ['amr_nb', 'amr_wb']:
|
|
63
66
|
command.extend(['-ar', '8000']) # AMR-NB 需要 8kHz 采样率
|
|
64
|
-
|
|
67
|
+
|
|
65
68
|
# 4. 为 AAC 指定输出格式 (移除 -f adts, 使用 MP4 容器)
|
|
66
69
|
# if codec == 'aac':
|
|
67
70
|
# command.extend(['-f', 'adts'])
|
|
@@ -69,13 +72,18 @@ def apply_codec_distortion(wav, sr, codec='libopus', bitrate='24k'):
|
|
|
69
72
|
command.append(output_filename)
|
|
70
73
|
|
|
71
74
|
# 执行命令, 并隐藏输出
|
|
72
|
-
subprocess.run(command,
|
|
75
|
+
subprocess.run(command,
|
|
76
|
+
check=True,
|
|
77
|
+
stdout=subprocess.PIPE,
|
|
78
|
+
stderr=subprocess.PIPE)
|
|
73
79
|
|
|
74
80
|
# 4. 将编码后的文件转换回 WAV 格式以便读取
|
|
75
81
|
wav_output = f"temp_final_{codec}_{bitrate}.wav"
|
|
76
|
-
subprocess.run(['ffmpeg', '-y', '-i', output_filename, wav_output],
|
|
77
|
-
|
|
78
|
-
|
|
82
|
+
subprocess.run(['ffmpeg', '-y', '-i', output_filename, wav_output],
|
|
83
|
+
check=True,
|
|
84
|
+
stdout=subprocess.PIPE,
|
|
85
|
+
stderr=subprocess.PIPE)
|
|
86
|
+
|
|
79
87
|
# 5. 从 WAV 文件读回失真的音频
|
|
80
88
|
samples = sf.read(wav_output)[0]
|
|
81
89
|
|
|
@@ -87,32 +95,41 @@ def apply_codec_distortion(wav, sr, codec='libopus', bitrate='24k'):
|
|
|
87
95
|
return wav
|
|
88
96
|
finally:
|
|
89
97
|
# 6. 清理临时文件
|
|
90
|
-
for temp_file in [
|
|
98
|
+
for temp_file in [
|
|
99
|
+
input_filename, output_filename,
|
|
100
|
+
f"temp_final_{codec}_{bitrate}.wav"
|
|
101
|
+
]:
|
|
91
102
|
if os.path.exists(temp_file):
|
|
92
103
|
os.remove(temp_file)
|
|
93
104
|
|
|
94
105
|
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
# AMR-NB
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
106
|
+
if __name__ == "__main__":
|
|
107
|
+
# --- 使用示例 ---
|
|
108
|
+
wav_path = "/data/never/Desktop/kws_train/QA/wav_data/TIMIT.wav"
|
|
109
|
+
wav, wav_sr = sf.read(wav_path, always_2d=True)
|
|
110
|
+
|
|
111
|
+
# 1. 模拟 Opus 编解码器(常用于VoIP, WebRTC)
|
|
112
|
+
print("应用 Opus 编解码器失真...")
|
|
113
|
+
opus_wav = apply_codec_distortion(wav, wav_sr, codec='libopus', bitrate='24k')
|
|
114
|
+
sf.write('augmented_opus.wav', opus_wav, wav_sr)
|
|
115
|
+
|
|
116
|
+
# 2. 模拟 AAC 编解码器(常用于流媒体, Apple设备)
|
|
117
|
+
print("应用 AAC 编解码器失真...")
|
|
118
|
+
aac_wav = apply_codec_distortion(wav, wav_sr, codec='aac', bitrate='64k')
|
|
119
|
+
sf.write('augmented_aac.wav', aac_wav, wav_sr)
|
|
120
|
+
|
|
121
|
+
# 3. 模拟 AMR-NB 编解码器(常用于传统移动通信)
|
|
122
|
+
# AMR-NB 的码率是固定的几个值之一
|
|
123
|
+
amr_bitrates = [
|
|
124
|
+
'4.75k', '5.15k', '5.9k', '6.7k', '7.4k', '7.95k', '10.2k', '12.2k'
|
|
125
|
+
]
|
|
126
|
+
chosen_amr_bitrate = random.choice(amr_bitrates)
|
|
127
|
+
print(f"应用 AMR-NB @ {chosen_amr_bitrate} 编解码器失真...")
|
|
128
|
+
amr_wav = apply_codec_distortion(wav,
|
|
129
|
+
wav_sr,
|
|
130
|
+
codec='amr_nb',
|
|
131
|
+
bitrate=chosen_amr_bitrate)
|
|
132
|
+
# 注意:AMR通常是8kHz采样, librosa加载时会自动重采样, 这里我们保持原始sr
|
|
133
|
+
sf.write('augmented_amr.wav', amr_wav, wav_sr)
|
|
134
|
+
|
|
135
|
+
print("所有编解码器增强完成!")
|
|
@@ -18,7 +18,6 @@ Description: 丢包数据增强
|
|
|
18
18
|
“置零”是在深度学习框架下对“真正丢弃”问题的一种高效、可解的数学建模。 我们牺牲了一点点物理上的真实性, 换来了模型训练的可行性和高效性。
|
|
19
19
|
'''
|
|
20
20
|
import numpy as np
|
|
21
|
-
import random
|
|
22
21
|
import soundfile as sf
|
|
23
22
|
|
|
24
23
|
|
|
@@ -93,11 +92,12 @@ def simulate_packet_loss_vectorized(
|
|
|
93
92
|
return damaged_wav
|
|
94
93
|
|
|
95
94
|
|
|
96
|
-
|
|
97
|
-
|
|
95
|
+
if __name__ == "__main__":
|
|
96
|
+
# 生成一个白噪声
|
|
97
|
+
white_noise = np.random.randn(100000).astype(np.float32)
|
|
98
98
|
|
|
99
|
-
# 生成一个损坏的音频
|
|
100
|
-
damaged_audio = simulate_packet_loss_vectorized(white_noise, 16000, loss_rate=0.1, burst_prob=0.5)
|
|
99
|
+
# 生成一个损坏的音频
|
|
100
|
+
damaged_audio = simulate_packet_loss_vectorized(white_noise, 16000, loss_rate=0.1, burst_prob=0.5)
|
|
101
101
|
|
|
102
|
-
# 保存音频
|
|
103
|
-
sf.write("damaged_audio.wav", damaged_audio, 16000)
|
|
102
|
+
# 保存音频
|
|
103
|
+
sf.write("damaged_audio.wav", damaged_audio, 16000)
|
neverlib/audio_aug/quant_aug.py
CHANGED
|
@@ -3,13 +3,8 @@ Author: 凌逆战 | Never
|
|
|
3
3
|
Date: 2025-03-26 22:13:21
|
|
4
4
|
Description:
|
|
5
5
|
'''
|
|
6
|
-
import os
|
|
7
|
-
import sys
|
|
8
6
|
import numpy as np
|
|
9
7
|
import soundfile as sf
|
|
10
|
-
import librosa
|
|
11
|
-
|
|
12
|
-
print(librosa.__version__)
|
|
13
8
|
|
|
14
9
|
|
|
15
10
|
def apply_uniform_quantization(wav, bit_depth=8):
|
|
@@ -47,6 +42,13 @@ def apply_mulaw_quantization(wav, bit_depth=8):
|
|
|
47
42
|
wav (np.ndarray): 输入音频。
|
|
48
43
|
bit_depth (int): 目标模拟的位深度。
|
|
49
44
|
"""
|
|
45
|
+
try:
|
|
46
|
+
import librosa
|
|
47
|
+
except ImportError:
|
|
48
|
+
raise ImportError(
|
|
49
|
+
"librosa is required for apply_mulaw_quantization(). "
|
|
50
|
+
"Please install it via `pip install librosa`."
|
|
51
|
+
)
|
|
50
52
|
# mu 的值决定了量化级别的数量 (mu + 1)
|
|
51
53
|
mu = 2**bit_depth - 1
|
|
52
54
|
|
|
@@ -61,18 +63,18 @@ def apply_mulaw_quantization(wav, bit_depth=8):
|
|
|
61
63
|
return expanded_wav
|
|
62
64
|
|
|
63
65
|
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
# 模拟一个 8-bit 的老式数字音频设备
|
|
69
|
-
y_quantized_8bit = apply_uniform_quantization(wav, bit_depth=8)
|
|
70
|
-
sf.write('augmented_quantized_8bit.wav', y_quantized_8bit, wav_sr)
|
|
66
|
+
if __name__ == "__main__":
|
|
67
|
+
# --- 使用示例 ---
|
|
68
|
+
wav_path = "/data/never/Desktop/kws_train/QA/wav_data/TIMIT.wav"
|
|
69
|
+
wav, wav_sr = sf.read(wav_path, always_2d=True)
|
|
71
70
|
|
|
72
|
-
#
|
|
73
|
-
|
|
74
|
-
sf.write('
|
|
71
|
+
# 模拟一个 8-bit 的老式数字音频设备
|
|
72
|
+
y_quantized_8bit = apply_uniform_quantization(wav, bit_depth=8)
|
|
73
|
+
sf.write('augmented_quantized_8bit.wav', y_quantized_8bit, wav_sr)
|
|
75
74
|
|
|
75
|
+
# 模拟一个更差的 4-bit 设备
|
|
76
|
+
y_quantized_4bit = apply_uniform_quantization(wav, bit_depth=4)
|
|
77
|
+
sf.write('augmented_quantized_4bit.wav', y_quantized_4bit, wav_sr)
|
|
76
78
|
|
|
77
|
-
y_q = apply_mulaw_quantization(wav, bit_depth=8)
|
|
78
|
-
sf.write('augmented_mulaw_8bit.wav', y_q, wav_sr)
|
|
79
|
+
y_q = apply_mulaw_quantization(wav, bit_depth=8)
|
|
80
|
+
sf.write('augmented_mulaw_8bit.wav', y_q, wav_sr)
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
neverlib/data/white.wav
ADDED
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -1,20 +1,25 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
""
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
"
|
|
19
|
-
|
|
20
|
-
|
|
1
|
+
# This file is auto-generated. Do NOT edit manually.
|
|
2
|
+
# Generated by neverlib.QA.gen_init
|
|
3
|
+
from lazy_loader import attach
|
|
4
|
+
|
|
5
|
+
__getattr__, __dir__, __all__ = attach(
|
|
6
|
+
__name__,
|
|
7
|
+
submodules=[
|
|
8
|
+
"dataset_analyzer",
|
|
9
|
+
"quality_metrics",
|
|
10
|
+
"rms_distrubution",
|
|
11
|
+
"spectral_analysis",
|
|
12
|
+
"statistics",
|
|
13
|
+
"temporal_features",
|
|
14
|
+
"visualization",
|
|
15
|
+
],
|
|
16
|
+
submod_attrs={
|
|
17
|
+
"dataset_analyzer": ['AudioFileInfo', 'DatasetAnalyzer', 'analyze_audio_dataset'],
|
|
18
|
+
"quality_metrics": ['QualityAnalyzer', 'audio_health_check', 'comprehensive_quality_assessment'],
|
|
19
|
+
"rms_distrubution": ['get_rms_vad'],
|
|
20
|
+
"spectral_analysis": ['SpectralAnalyzer', 'compute_spectral_features', 'frequency_domain_stats'],
|
|
21
|
+
"statistics": ['AudioStatistics', 'compare_datasets', 'quick_audio_stats'],
|
|
22
|
+
"temporal_features": ['dB', 'dc_offset', 'max_rms_amplitude', 'mean_rms_amplitude', 'min_rms_amplitude', 'peak_amplitude', 'rms_amplitude', 'short_time_energy', 'zero_crossing_rate'],
|
|
23
|
+
"visualization": ['AudioVisualizer', 'create_analysis_dashboard', 'plot_dataset_overview'],
|
|
24
|
+
}
|
|
25
|
+
)
|