neverlib 0.2.6__py3-none-any.whl → 0.2.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- neverlib/.claude/settings.local.json +9 -0
- neverlib/Docs/audio_aug/test_volume.ipynb +416 -0
- neverlib/Docs/audio_aug_test/test_volume.ipynb +289 -0
- neverlib/Docs/filter/biquad.ipynb +129 -0
- neverlib/Docs/filter/filter_family.ipynb +450 -0
- neverlib/Docs/filter/highpass.ipynb +139 -0
- neverlib/Docs/filter/scipy_filter_family.ipynb +110 -0
- neverlib/Docs/vad/VAD_Energy.ipynb +167 -0
- neverlib/Docs/vad/VAD_Silero.ipynb +325 -0
- neverlib/Docs/vad/VAD_WebRTC.ipynb +189 -0
- neverlib/Docs/vad/VAD_funasr.ipynb +192 -0
- neverlib/Docs/vad/VAD_rvADfast.ipynb +162 -0
- neverlib/Docs/vad/VAD_statistics.ipynb +532 -0
- neverlib/Docs/vad/VAD_tenVAD.ipynb +292 -0
- neverlib/Docs/vad/VAD_vadlib.ipynb +168 -0
- neverlib/Docs/vad/VAD_whisper.ipynb +404 -0
- neverlib/QA/gen_init.py +218 -0
- neverlib/QA/get_fun.py +19 -0
- neverlib/__init__.py +40 -4
- neverlib/audio_aug/HarmonicDistortion.py +19 -13
- neverlib/audio_aug/__init__.py +82 -12
- neverlib/audio_aug/audio_aug.py +19 -14
- neverlib/audio_aug/clip_aug.py +15 -18
- neverlib/audio_aug/coder_aug.py +44 -24
- neverlib/audio_aug/coder_aug2.py +54 -37
- neverlib/audio_aug/loss_packet_aug.py +7 -7
- neverlib/audio_aug/quant_aug.py +19 -17
- neverlib/data/000_short_enhance.wav +0 -0
- neverlib/data/3956_speech.wav +0 -0
- neverlib/data/3956_sweep.wav +0 -0
- neverlib/data/vad_example.wav +0 -0
- neverlib/data/white.wav +0 -0
- neverlib/data/white_EQ.wav +0 -0
- neverlib/data/white_matched.wav +0 -0
- neverlib/data_analyze/__init__.py +69 -20
- neverlib/data_analyze/dataset_analyzer.py +109 -114
- neverlib/data_analyze/quality_metrics.py +87 -89
- neverlib/data_analyze/rms_distrubution.py +23 -42
- neverlib/data_analyze/spectral_analysis.py +43 -46
- neverlib/data_analyze/statistics.py +76 -76
- neverlib/data_analyze/temporal_features.py +15 -6
- neverlib/data_analyze/visualization.py +208 -144
- neverlib/filter/__init__.py +40 -20
- neverlib/filter/auto_eq/__init__.py +50 -31
- neverlib/filter/auto_eq/de_eq.py +0 -2
- neverlib/filter/common.py +24 -5
- neverlib/metrics/DNSMOS/bak_ovr.onnx +0 -0
- neverlib/metrics/DNSMOS/model_v8.onnx +0 -0
- neverlib/metrics/DNSMOS/sig.onnx +0 -0
- neverlib/metrics/DNSMOS/sig_bak_ovr.onnx +0 -0
- neverlib/metrics/__init__.py +59 -0
- neverlib/metrics/dnsmos.py +4 -15
- neverlib/metrics/pDNSMOS/sig_bak_ovr.onnx +0 -0
- neverlib/metrics/pesq_c/PESQ +0 -0
- neverlib/metrics/pesq_c/dsp.c +553 -0
- neverlib/metrics/pesq_c/dsp.h +138 -0
- neverlib/metrics/pesq_c/pesq.h +294 -0
- neverlib/metrics/pesq_c/pesqdsp.c +1047 -0
- neverlib/metrics/pesq_c/pesqio.c +392 -0
- neverlib/metrics/pesq_c/pesqmain.c +610 -0
- neverlib/metrics/pesq_c/pesqmod.c +1417 -0
- neverlib/metrics/pesq_c/pesqpar.h +297 -0
- neverlib/metrics/snr.py +5 -1
- neverlib/metrics/spec.py +31 -21
- neverlib/metrics/test_pesq.py +0 -4
- neverlib/tests/__init__.py +33 -1
- neverlib/tests/test_imports.py +19 -0
- neverlib/utils/__init__.py +71 -15
- neverlib/utils/audio_split.py +6 -1
- neverlib/utils/checkGPU.py +17 -9
- neverlib/utils/lazy_expose.py +29 -0
- neverlib/utils/utils.py +55 -12
- neverlib/vad/PreProcess.py +66 -66
- neverlib/vad/__init__.py +71 -25
- neverlib/vad/class_get_speech.py +1 -1
- neverlib/vad/class_vad.py +3 -3
- neverlib/vad/img.png +0 -0
- {neverlib-0.2.6.dist-info → neverlib-0.2.8.dist-info}/METADATA +1 -1
- {neverlib-0.2.6.dist-info → neverlib-0.2.8.dist-info}/RECORD +82 -39
- {neverlib-0.2.6.dist-info → neverlib-0.2.8.dist-info}/WHEEL +0 -0
- {neverlib-0.2.6.dist-info → neverlib-0.2.8.dist-info}/licenses/LICENSE +0 -0
- {neverlib-0.2.6.dist-info → neverlib-0.2.8.dist-info}/top_level.txt +0 -0
neverlib/audio_aug/coder_aug.py
CHANGED
|
@@ -20,16 +20,20 @@ AMR (Adaptive Multi-Rate)
|
|
|
20
20
|
- 压缩特性:严格为语音设计, 会滤除大部分非语音频率(如音乐), 导致音乐听起来“电话音”效果。
|
|
21
21
|
- 数据增强目的:固定采样率:AMR-NB (窄带) 为 8kHz, AMR-WB (宽带) 为 16kHz。这一点至关重要!
|
|
22
22
|
"""
|
|
23
|
+
import random
|
|
23
24
|
import numpy as np
|
|
24
25
|
import soundfile as sf
|
|
25
|
-
from audiomentations import Mp3Compression
|
|
26
|
-
import av
|
|
27
|
-
import random
|
|
28
|
-
|
|
29
|
-
# mp3编解码数据增强
|
|
30
26
|
|
|
31
27
|
|
|
32
28
|
def mp3_aug(wav, sr):
|
|
29
|
+
# mp3编解码数据增强
|
|
30
|
+
try:
|
|
31
|
+
from audiomentations import Mp3Compression
|
|
32
|
+
except ImportError:
|
|
33
|
+
raise ImportError(
|
|
34
|
+
"audiomentations is required for mp3_aug(). "
|
|
35
|
+
"Please install it via `pip install audiomentations`.")
|
|
36
|
+
|
|
33
37
|
# return Mp3Compression(min_bitrate=64, max_bitrate=192, p=1.0)(samples, sample_rate)
|
|
34
38
|
return sf.write('audio.mp3', wav, sr, format='MP3', bitrate='192k')
|
|
35
39
|
|
|
@@ -47,11 +51,20 @@ def opus_aug_save(wav: np.ndarray, sr: int, output_filepath: str):
|
|
|
47
51
|
对音频进行 Opus 压缩, 并直接保存到文件。
|
|
48
52
|
使用 PyAV 实现, 比特率是随机的。
|
|
49
53
|
"""
|
|
54
|
+
try:
|
|
55
|
+
import av
|
|
56
|
+
except ImportError:
|
|
57
|
+
raise ImportError("av is required for opus_aug_save(). "
|
|
58
|
+
"Please install it via `pip install av`.")
|
|
59
|
+
|
|
50
60
|
# 随机选择一个比特率 (kbps)
|
|
51
61
|
bitrate_kbps = random.choice([24, 32, 48, 64, 96, 128])
|
|
52
|
-
output_filepath_with_bitrate = output_filepath.replace(
|
|
62
|
+
output_filepath_with_bitrate = output_filepath.replace(
|
|
63
|
+
'.opus', f'_{bitrate_kbps}k.opus')
|
|
53
64
|
|
|
54
|
-
print(
|
|
65
|
+
print(
|
|
66
|
+
f" -> Saving Opus augmented version to: {output_filepath_with_bitrate} (Bitrate: {bitrate_kbps}k)"
|
|
67
|
+
)
|
|
55
68
|
|
|
56
69
|
# PyAV 需要 (n_channels, n_samples) 格式
|
|
57
70
|
wav_ch_first = wav.T if wav.ndim > 1 else wav.reshape(1, -1)
|
|
@@ -81,19 +94,28 @@ def aac_aug_save(wav: np.ndarray, sr: int, output_filepath: str):
|
|
|
81
94
|
对音频进行 AAC 压缩, 并直接保存到文件。
|
|
82
95
|
使用 PyAV 实现, 比特率是随机的。
|
|
83
96
|
"""
|
|
97
|
+
try:
|
|
98
|
+
import av
|
|
99
|
+
except ImportError:
|
|
100
|
+
raise ImportError("av is required for aac_aug_save(). "
|
|
101
|
+
"Please install it via `pip install av`.")
|
|
84
102
|
# 随机选择一个比特率 (kbps)
|
|
85
103
|
bitrate_kbps = random.choice([48, 64, 96, 128, 160, 192])
|
|
86
104
|
# .m4a 是 AAC 更常用的文件后缀
|
|
87
|
-
output_filepath_with_bitrate = output_filepath.replace(
|
|
105
|
+
output_filepath_with_bitrate = output_filepath.replace(
|
|
106
|
+
'.aac', f'_{bitrate_kbps}k.m4a')
|
|
88
107
|
|
|
89
|
-
print(
|
|
108
|
+
print(
|
|
109
|
+
f" -> Saving AAC augmented version to: {output_filepath_with_bitrate} (Bitrate: {bitrate_kbps}k)"
|
|
110
|
+
)
|
|
90
111
|
|
|
91
112
|
# PyAV 需要 (n_channels, n_samples) 格式
|
|
92
113
|
wav_ch_first = wav.T if wav.ndim > 1 else wav.reshape(1, -1)
|
|
93
114
|
layout = 'stereo' if wav.ndim > 1 else 'mono'
|
|
94
115
|
|
|
95
116
|
# 注意:format='adts' 是原始 AAC 流, 'mp4' 会创建 .m4a/.mp4 容器
|
|
96
|
-
with av.open(output_filepath_with_bitrate, mode='w',
|
|
117
|
+
with av.open(output_filepath_with_bitrate, mode='w',
|
|
118
|
+
format='mp4') as container:
|
|
97
119
|
# 使用高质量的 fdk_aac 编码器
|
|
98
120
|
stream = container.add_stream('libfdk_aac', rate=sr, layout=layout)
|
|
99
121
|
stream.bit_rate = bitrate_kbps * 1000
|
|
@@ -111,13 +133,11 @@ def aac_aug_save(wav: np.ndarray, sr: int, output_filepath: str):
|
|
|
111
133
|
print(f" ... success.")
|
|
112
134
|
|
|
113
135
|
|
|
114
|
-
def flac_encode_save(
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
bits_per_sample=None
|
|
120
|
-
):
|
|
136
|
+
def flac_encode_save(wav: np.ndarray,
|
|
137
|
+
sr: int,
|
|
138
|
+
output_filepath: str,
|
|
139
|
+
compression_level: int = 5,
|
|
140
|
+
bits_per_sample=None):
|
|
121
141
|
"""
|
|
122
142
|
使用 pyFLAC 将 NumPy 音频数组编码为 FLAC 文件并保存。
|
|
123
143
|
|
|
@@ -164,14 +184,14 @@ def flac_encode_save(
|
|
|
164
184
|
else:
|
|
165
185
|
wav_int = wav
|
|
166
186
|
else:
|
|
167
|
-
raise ValueError(
|
|
187
|
+
raise ValueError(
|
|
188
|
+
f"Unsupported bits_per_sample: {bits_per_sample}. Must be 16, 24, or 32."
|
|
189
|
+
)
|
|
168
190
|
|
|
169
191
|
# --- 2. 初始化编码器 ---
|
|
170
|
-
encoder = Encoder(
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
compression_level=compression_level
|
|
174
|
-
)
|
|
192
|
+
encoder = Encoder(sample_rate=sr,
|
|
193
|
+
bits_per_sample=bits_per_sample,
|
|
194
|
+
compression_level=compression_level)
|
|
175
195
|
|
|
176
196
|
# --- 3. 处理数据并获取编码后的字节 ---
|
|
177
197
|
# Encoder.process() 可以分块处理, 但对于中等长度的音频, 一次性处理更简单
|
|
@@ -196,8 +216,8 @@ def amr_wb_aug(wav, sr):
|
|
|
196
216
|
# return ApplyCodec(encoder="libamr_wb", p=1.0)(samples, sample_rate)
|
|
197
217
|
return sf.write('audio.amr', wav, sr, format='AMR', bitrate='192k')
|
|
198
218
|
|
|
199
|
-
# Opus 编解码数据增强
|
|
200
219
|
|
|
220
|
+
# Opus 编解码数据增强
|
|
201
221
|
|
|
202
222
|
if __name__ == "__main__":
|
|
203
223
|
wav_path = "/data/never/Desktop/kws_train/QA/wav_data/TIMIT.wav"
|
neverlib/audio_aug/coder_aug2.py
CHANGED
|
@@ -3,23 +3,23 @@ Author: 凌逆战 | Never
|
|
|
3
3
|
Date: 2025-07-29 17:57:26
|
|
4
4
|
Description:
|
|
5
5
|
'''
|
|
6
|
-
import numpy as np
|
|
7
|
-
import librosa
|
|
8
|
-
import soundfile as sf
|
|
9
|
-
import subprocess
|
|
10
6
|
import os
|
|
11
7
|
import random
|
|
12
|
-
import
|
|
8
|
+
import soundfile as sf
|
|
9
|
+
import subprocess
|
|
13
10
|
|
|
14
11
|
|
|
15
12
|
def check_codec_available(codec):
|
|
16
13
|
"""检查指定的编解码器是否在FFmpeg中可用"""
|
|
17
14
|
try:
|
|
18
|
-
result = subprocess.run(['ffmpeg', '-encoders'],
|
|
15
|
+
result = subprocess.run(['ffmpeg', '-encoders'],
|
|
16
|
+
capture_output=True,
|
|
17
|
+
text=True)
|
|
19
18
|
return codec in result.stdout
|
|
20
19
|
except:
|
|
21
20
|
return False
|
|
22
21
|
|
|
22
|
+
|
|
23
23
|
def apply_codec_distortion(wav, sr, codec='libopus', bitrate='24k'):
|
|
24
24
|
"""
|
|
25
25
|
使用 FFmpeg 对音频应用指定的编解码器和码率, 以模拟有损压缩失真。
|
|
@@ -56,12 +56,15 @@ def apply_codec_distortion(wav, sr, codec='libopus', bitrate='24k'):
|
|
|
56
56
|
sf.write(input_filename, wav, sr)
|
|
57
57
|
|
|
58
58
|
# 2. 构建 FFmpeg 命令
|
|
59
|
-
command = [
|
|
59
|
+
command = [
|
|
60
|
+
'ffmpeg', '-y', '-i', input_filename, '-c:a', codec, '-b:a',
|
|
61
|
+
bitrate
|
|
62
|
+
]
|
|
60
63
|
|
|
61
64
|
# 3. 为 AMR 编解码器添加重采样参数
|
|
62
65
|
if codec in ['amr_nb', 'amr_wb']:
|
|
63
66
|
command.extend(['-ar', '8000']) # AMR-NB 需要 8kHz 采样率
|
|
64
|
-
|
|
67
|
+
|
|
65
68
|
# 4. 为 AAC 指定输出格式 (移除 -f adts, 使用 MP4 容器)
|
|
66
69
|
# if codec == 'aac':
|
|
67
70
|
# command.extend(['-f', 'adts'])
|
|
@@ -69,13 +72,18 @@ def apply_codec_distortion(wav, sr, codec='libopus', bitrate='24k'):
|
|
|
69
72
|
command.append(output_filename)
|
|
70
73
|
|
|
71
74
|
# 执行命令, 并隐藏输出
|
|
72
|
-
subprocess.run(command,
|
|
75
|
+
subprocess.run(command,
|
|
76
|
+
check=True,
|
|
77
|
+
stdout=subprocess.PIPE,
|
|
78
|
+
stderr=subprocess.PIPE)
|
|
73
79
|
|
|
74
80
|
# 4. 将编码后的文件转换回 WAV 格式以便读取
|
|
75
81
|
wav_output = f"temp_final_{codec}_{bitrate}.wav"
|
|
76
|
-
subprocess.run(['ffmpeg', '-y', '-i', output_filename, wav_output],
|
|
77
|
-
|
|
78
|
-
|
|
82
|
+
subprocess.run(['ffmpeg', '-y', '-i', output_filename, wav_output],
|
|
83
|
+
check=True,
|
|
84
|
+
stdout=subprocess.PIPE,
|
|
85
|
+
stderr=subprocess.PIPE)
|
|
86
|
+
|
|
79
87
|
# 5. 从 WAV 文件读回失真的音频
|
|
80
88
|
samples = sf.read(wav_output)[0]
|
|
81
89
|
|
|
@@ -87,32 +95,41 @@ def apply_codec_distortion(wav, sr, codec='libopus', bitrate='24k'):
|
|
|
87
95
|
return wav
|
|
88
96
|
finally:
|
|
89
97
|
# 6. 清理临时文件
|
|
90
|
-
for temp_file in [
|
|
98
|
+
for temp_file in [
|
|
99
|
+
input_filename, output_filename,
|
|
100
|
+
f"temp_final_{codec}_{bitrate}.wav"
|
|
101
|
+
]:
|
|
91
102
|
if os.path.exists(temp_file):
|
|
92
103
|
os.remove(temp_file)
|
|
93
104
|
|
|
94
105
|
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
# AMR-NB
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
106
|
+
if __name__ == "__main__":
|
|
107
|
+
# --- 使用示例 ---
|
|
108
|
+
wav_path = "/data/never/Desktop/kws_train/QA/wav_data/TIMIT.wav"
|
|
109
|
+
wav, wav_sr = sf.read(wav_path, always_2d=True)
|
|
110
|
+
|
|
111
|
+
# 1. 模拟 Opus 编解码器(常用于VoIP, WebRTC)
|
|
112
|
+
print("应用 Opus 编解码器失真...")
|
|
113
|
+
opus_wav = apply_codec_distortion(wav, wav_sr, codec='libopus', bitrate='24k')
|
|
114
|
+
sf.write('augmented_opus.wav', opus_wav, wav_sr)
|
|
115
|
+
|
|
116
|
+
# 2. 模拟 AAC 编解码器(常用于流媒体, Apple设备)
|
|
117
|
+
print("应用 AAC 编解码器失真...")
|
|
118
|
+
aac_wav = apply_codec_distortion(wav, wav_sr, codec='aac', bitrate='64k')
|
|
119
|
+
sf.write('augmented_aac.wav', aac_wav, wav_sr)
|
|
120
|
+
|
|
121
|
+
# 3. 模拟 AMR-NB 编解码器(常用于传统移动通信)
|
|
122
|
+
# AMR-NB 的码率是固定的几个值之一
|
|
123
|
+
amr_bitrates = [
|
|
124
|
+
'4.75k', '5.15k', '5.9k', '6.7k', '7.4k', '7.95k', '10.2k', '12.2k'
|
|
125
|
+
]
|
|
126
|
+
chosen_amr_bitrate = random.choice(amr_bitrates)
|
|
127
|
+
print(f"应用 AMR-NB @ {chosen_amr_bitrate} 编解码器失真...")
|
|
128
|
+
amr_wav = apply_codec_distortion(wav,
|
|
129
|
+
wav_sr,
|
|
130
|
+
codec='amr_nb',
|
|
131
|
+
bitrate=chosen_amr_bitrate)
|
|
132
|
+
# 注意:AMR通常是8kHz采样, librosa加载时会自动重采样, 这里我们保持原始sr
|
|
133
|
+
sf.write('augmented_amr.wav', amr_wav, wav_sr)
|
|
134
|
+
|
|
135
|
+
print("所有编解码器增强完成!")
|
|
@@ -18,7 +18,6 @@ Description: 丢包数据增强
|
|
|
18
18
|
“置零”是在深度学习框架下对“真正丢弃”问题的一种高效、可解的数学建模。 我们牺牲了一点点物理上的真实性, 换来了模型训练的可行性和高效性。
|
|
19
19
|
'''
|
|
20
20
|
import numpy as np
|
|
21
|
-
import random
|
|
22
21
|
import soundfile as sf
|
|
23
22
|
|
|
24
23
|
|
|
@@ -93,11 +92,12 @@ def simulate_packet_loss_vectorized(
|
|
|
93
92
|
return damaged_wav
|
|
94
93
|
|
|
95
94
|
|
|
96
|
-
|
|
97
|
-
|
|
95
|
+
if __name__ == "__main__":
|
|
96
|
+
# 生成一个白噪声
|
|
97
|
+
white_noise = np.random.randn(100000).astype(np.float32)
|
|
98
98
|
|
|
99
|
-
# 生成一个损坏的音频
|
|
100
|
-
damaged_audio = simulate_packet_loss_vectorized(white_noise, 16000, loss_rate=0.1, burst_prob=0.5)
|
|
99
|
+
# 生成一个损坏的音频
|
|
100
|
+
damaged_audio = simulate_packet_loss_vectorized(white_noise, 16000, loss_rate=0.1, burst_prob=0.5)
|
|
101
101
|
|
|
102
|
-
# 保存音频
|
|
103
|
-
sf.write("damaged_audio.wav", damaged_audio, 16000)
|
|
102
|
+
# 保存音频
|
|
103
|
+
sf.write("damaged_audio.wav", damaged_audio, 16000)
|
neverlib/audio_aug/quant_aug.py
CHANGED
|
@@ -3,13 +3,8 @@ Author: 凌逆战 | Never
|
|
|
3
3
|
Date: 2025-03-26 22:13:21
|
|
4
4
|
Description:
|
|
5
5
|
'''
|
|
6
|
-
import os
|
|
7
|
-
import sys
|
|
8
6
|
import numpy as np
|
|
9
7
|
import soundfile as sf
|
|
10
|
-
import librosa
|
|
11
|
-
|
|
12
|
-
print(librosa.__version__)
|
|
13
8
|
|
|
14
9
|
|
|
15
10
|
def apply_uniform_quantization(wav, bit_depth=8):
|
|
@@ -47,6 +42,13 @@ def apply_mulaw_quantization(wav, bit_depth=8):
|
|
|
47
42
|
wav (np.ndarray): 输入音频。
|
|
48
43
|
bit_depth (int): 目标模拟的位深度。
|
|
49
44
|
"""
|
|
45
|
+
try:
|
|
46
|
+
import librosa
|
|
47
|
+
except ImportError:
|
|
48
|
+
raise ImportError(
|
|
49
|
+
"librosa is required for apply_mulaw_quantization(). "
|
|
50
|
+
"Please install it via `pip install librosa`."
|
|
51
|
+
)
|
|
50
52
|
# mu 的值决定了量化级别的数量 (mu + 1)
|
|
51
53
|
mu = 2**bit_depth - 1
|
|
52
54
|
|
|
@@ -61,18 +63,18 @@ def apply_mulaw_quantization(wav, bit_depth=8):
|
|
|
61
63
|
return expanded_wav
|
|
62
64
|
|
|
63
65
|
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
# 模拟一个 8-bit 的老式数字音频设备
|
|
69
|
-
y_quantized_8bit = apply_uniform_quantization(wav, bit_depth=8)
|
|
70
|
-
sf.write('augmented_quantized_8bit.wav', y_quantized_8bit, wav_sr)
|
|
66
|
+
if __name__ == "__main__":
|
|
67
|
+
# --- 使用示例 ---
|
|
68
|
+
wav_path = "/data/never/Desktop/kws_train/QA/wav_data/TIMIT.wav"
|
|
69
|
+
wav, wav_sr = sf.read(wav_path, always_2d=True)
|
|
71
70
|
|
|
72
|
-
#
|
|
73
|
-
|
|
74
|
-
sf.write('
|
|
71
|
+
# 模拟一个 8-bit 的老式数字音频设备
|
|
72
|
+
y_quantized_8bit = apply_uniform_quantization(wav, bit_depth=8)
|
|
73
|
+
sf.write('augmented_quantized_8bit.wav', y_quantized_8bit, wav_sr)
|
|
75
74
|
|
|
75
|
+
# 模拟一个更差的 4-bit 设备
|
|
76
|
+
y_quantized_4bit = apply_uniform_quantization(wav, bit_depth=4)
|
|
77
|
+
sf.write('augmented_quantized_4bit.wav', y_quantized_4bit, wav_sr)
|
|
76
78
|
|
|
77
|
-
y_q = apply_mulaw_quantization(wav, bit_depth=8)
|
|
78
|
-
sf.write('augmented_mulaw_8bit.wav', y_q, wav_sr)
|
|
79
|
+
y_q = apply_mulaw_quantization(wav, bit_depth=8)
|
|
80
|
+
sf.write('augmented_mulaw_8bit.wav', y_q, wav_sr)
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
neverlib/data/white.wav
ADDED
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -1,20 +1,69 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
from
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
1
|
+
# This file is auto-generated. Do NOT edit manually.
|
|
2
|
+
# Generated by neverlib.QA.gen_init
|
|
3
|
+
|
|
4
|
+
from typing import TYPE_CHECKING, Any
|
|
5
|
+
|
|
6
|
+
if TYPE_CHECKING:
|
|
7
|
+
# 仅在类型检查时导入,提供IDE补全支持
|
|
8
|
+
from .dataset_analyzer import AudioFileInfo, DatasetAnalyzer, analyze_audio_dataset
|
|
9
|
+
from .quality_metrics import QualityAnalyzer, audio_health_check, comprehensive_quality_assessment
|
|
10
|
+
from .rms_distrubution import get_rms_vad
|
|
11
|
+
from .spectral_analysis import SpectralAnalyzer, compute_spectral_features, frequency_domain_stats
|
|
12
|
+
from .statistics import AudioStatistics, compare_datasets, quick_audio_stats
|
|
13
|
+
from .temporal_features import dB, dc_offset, max_rms_amplitude, mean_rms_amplitude, min_rms_amplitude, peak_amplitude, rms_amplitude, short_time_energy, zero_crossing_rate
|
|
14
|
+
from .visualization import AudioVisualizer, create_analysis_dashboard, plot_dataset_overview
|
|
15
|
+
|
|
16
|
+
# 运行时使用懒加载
|
|
17
|
+
from lazy_loader import attach
|
|
18
|
+
|
|
19
|
+
__getattr__, __dir__, __all__ = attach(
|
|
20
|
+
__name__,
|
|
21
|
+
submodules=[
|
|
22
|
+
"dataset_analyzer",
|
|
23
|
+
"quality_metrics",
|
|
24
|
+
"rms_distrubution",
|
|
25
|
+
"spectral_analysis",
|
|
26
|
+
"statistics",
|
|
27
|
+
"temporal_features",
|
|
28
|
+
"visualization",
|
|
29
|
+
],
|
|
30
|
+
submod_attrs={
|
|
31
|
+
"dataset_analyzer": ['AudioFileInfo', 'DatasetAnalyzer', 'analyze_audio_dataset'],
|
|
32
|
+
"quality_metrics": ['QualityAnalyzer', 'audio_health_check', 'comprehensive_quality_assessment'],
|
|
33
|
+
"rms_distrubution": ['get_rms_vad'],
|
|
34
|
+
"spectral_analysis": ['SpectralAnalyzer', 'compute_spectral_features', 'frequency_domain_stats'],
|
|
35
|
+
"statistics": ['AudioStatistics', 'compare_datasets', 'quick_audio_stats'],
|
|
36
|
+
"temporal_features": ['dB', 'dc_offset', 'max_rms_amplitude', 'mean_rms_amplitude', 'min_rms_amplitude', 'peak_amplitude', 'rms_amplitude', 'short_time_energy', 'zero_crossing_rate'],
|
|
37
|
+
"visualization": ['AudioVisualizer', 'create_analysis_dashboard', 'plot_dataset_overview'],
|
|
38
|
+
}
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
# 显式声明 __all__ 以便 IDE 识别
|
|
42
|
+
if TYPE_CHECKING:
|
|
43
|
+
__all__ = [
|
|
44
|
+
'AudioFileInfo',
|
|
45
|
+
'DatasetAnalyzer',
|
|
46
|
+
'analyze_audio_dataset',
|
|
47
|
+
'QualityAnalyzer',
|
|
48
|
+
'audio_health_check',
|
|
49
|
+
'comprehensive_quality_assessment',
|
|
50
|
+
'get_rms_vad',
|
|
51
|
+
'SpectralAnalyzer',
|
|
52
|
+
'compute_spectral_features',
|
|
53
|
+
'frequency_domain_stats',
|
|
54
|
+
'AudioStatistics',
|
|
55
|
+
'compare_datasets',
|
|
56
|
+
'quick_audio_stats',
|
|
57
|
+
'dB',
|
|
58
|
+
'dc_offset',
|
|
59
|
+
'max_rms_amplitude',
|
|
60
|
+
'mean_rms_amplitude',
|
|
61
|
+
'min_rms_amplitude',
|
|
62
|
+
'peak_amplitude',
|
|
63
|
+
'rms_amplitude',
|
|
64
|
+
'short_time_energy',
|
|
65
|
+
'zero_crossing_rate',
|
|
66
|
+
'AudioVisualizer',
|
|
67
|
+
'create_analysis_dashboard',
|
|
68
|
+
'plot_dataset_overview',
|
|
69
|
+
]
|