neverlib 0.2.3__py3-none-any.whl → 0.2.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- neverlib/.history/Docs/audio_aug/test_snr_20250806011311.py +0 -0
- neverlib/.history/Docs/audio_aug/test_snr_20250806011331.py +75 -0
- neverlib/.history/Docs/audio_aug/test_snr_20250806011342.py +57 -0
- neverlib/.history/Docs/audio_aug/test_snr_20250806011352.py +57 -0
- neverlib/.history/Docs/audio_aug/test_snr_20250806011403.py +57 -0
- neverlib/.history/Docs/audio_aug/test_snr_20250806011413.py +57 -0
- neverlib/.history/Docs/audio_aug/test_snr_20250806011435.py +55 -0
- neverlib/.history/Docs/vad/1_20250810032405.py +0 -0
- neverlib/.history/Docs/vad/1_20250810032417.py +39 -0
- neverlib/.history/audio_aug/audio_aug_20250806010451.py +125 -0
- neverlib/.history/audio_aug/audio_aug_20250806010750.py +138 -0
- neverlib/.history/audio_aug/audio_aug_20250806010759.py +140 -0
- neverlib/.history/audio_aug/audio_aug_20250806010803.py +140 -0
- neverlib/.history/audio_aug/audio_aug_20250806010809.py +140 -0
- neverlib/.history/audio_aug/audio_aug_20250806011108.py +140 -0
- neverlib/.history/dataAnalyze/__init___20250806204125.py +14 -0
- neverlib/.history/dataAnalyze/__init___20250806204139.py +14 -0
- neverlib/.history/dataAnalyze/__init___20250806204159.py +14 -0
- neverlib/.history/filter/__init___20250820103351.py +70 -0
- neverlib/.history/filter/__init___20250821102348.py +70 -0
- neverlib/.history/filter/__init___20250821102405.py +14 -0
- neverlib/.history/filter/auto_eq/__init___20250819213121.py +36 -0
- neverlib/.history/filter/auto_eq/__init___20250821102241.py +36 -0
- neverlib/.history/filter/auto_eq/__init___20250821102259.py +36 -0
- neverlib/.history/filter/auto_eq/__init___20250821102307.py +36 -0
- neverlib/.history/filter/auto_eq/__init___20250821102310.py +36 -0
- neverlib/.history/filter/auto_eq/__init___20250821102318.py +36 -0
- neverlib/.history/filter/auto_eq/__init___20250821102507.py +36 -0
- neverlib/{filter/AudoEQ/auto_eq_de.py → .history/filter/auto_eq/de_eq_20250820103848.py} +1 -1
- neverlib/.history/filter/auto_eq/de_eq_20250821102422.py +360 -0
- neverlib/.history/filter/auto_eq/freq_eq_20250820140732.py +75 -0
- neverlib/.history/filter/auto_eq/freq_eq_20250820140745.py +75 -0
- neverlib/.history/filter/auto_eq/freq_eq_20250820140816.py +75 -0
- neverlib/.history/filter/auto_eq/freq_eq_20250820140938.py +77 -0
- neverlib/.history/filter/auto_eq/freq_eq_20250820141003.py +77 -0
- neverlib/.history/filter/auto_eq/freq_eq_20250820141006.py +77 -0
- neverlib/.history/filter/auto_eq/freq_eq_20250820141019.py +77 -0
- neverlib/.history/filter/auto_eq/freq_eq_20250820141049.py +77 -0
- neverlib/.history/filter/auto_eq/freq_eq_20250820141211.py +77 -0
- neverlib/.history/filter/auto_eq/freq_eq_20250820141227.py +77 -0
- neverlib/.history/filter/auto_eq/freq_eq_20250820141311.py +78 -0
- neverlib/.history/filter/auto_eq/freq_eq_20250820141340.py +78 -0
- neverlib/.history/filter/auto_eq/freq_eq_20250820141712.py +78 -0
- neverlib/.history/filter/auto_eq/freq_eq_20250820141733.py +78 -0
- neverlib/.history/filter/auto_eq/freq_eq_20250820141755.py +78 -0
- neverlib/.history/filter/auto_eq/freq_eq_20250821102434.py +76 -0
- neverlib/.history/filter/auto_eq/freq_eq_20250821102500.py +76 -0
- neverlib/.history/filter/auto_eq/freq_eq_20250821102502.py +76 -0
- neverlib/{filter/AudoEQ/auto_eq_ga_basic.py → .history/filter/auto_eq/ga_eq_basic_20250820102957.py} +1 -1
- neverlib/.history/filter/auto_eq/ga_eq_basic_20250820113054.py +380 -0
- neverlib/.history/filter/auto_eq/ga_eq_basic_20250820113150.py +380 -0
- neverlib/.history/filter/auto_eq/ga_eq_basic_20250820113520.py +385 -0
- neverlib/.history/filter/auto_eq/ga_eq_basic_20250820113525.py +385 -0
- neverlib/.history/filter/auto_eq/ga_eq_basic_20250821102212.py +385 -0
- neverlib/.history/metrics/dnsmos_20250806001612.py +160 -0
- neverlib/.history/metrics/dnsmos_20250815180659.py +160 -0
- neverlib/.history/metrics/dnsmos_20250815180701.py +158 -0
- neverlib/.history/metrics/dnsmos_20250815181321.py +154 -0
- neverlib/.history/metrics/dnsmos_20250815181327.py +154 -0
- neverlib/.history/metrics/dnsmos_20250815181331.py +154 -0
- neverlib/.history/metrics/dnsmos_20250815181620.py +154 -0
- neverlib/.history/metrics/dnsmos_20250815181631.py +154 -0
- neverlib/.history/metrics/dnsmos_20250815181742.py +154 -0
- neverlib/.history/metrics/dnsmos_20250815181824.py +153 -0
- neverlib/.history/metrics/dnsmos_20250815181834.py +153 -0
- neverlib/.history/metrics/dnsmos_20250815181922.py +153 -0
- neverlib/.history/metrics/dnsmos_20250815182011.py +147 -0
- neverlib/.history/metrics/dnsmos_20250815182036.py +144 -0
- neverlib/.history/metrics/dnsmos_20250815182936.py +143 -0
- neverlib/.history/metrics/dnsmos_20250815182942.py +143 -0
- neverlib/.history/metrics/dnsmos_20250815183032.py +137 -0
- neverlib/.history/metrics/dnsmos_20250815183101.py +144 -0
- neverlib/.history/metrics/dnsmos_20250815183121.py +144 -0
- neverlib/.history/metrics/dnsmos_20250815183123.py +143 -0
- neverlib/.history/metrics/dnsmos_20250815183214.py +143 -0
- neverlib/.history/metrics/dnsmos_20250815183240.py +143 -0
- neverlib/.history/metrics/dnsmos_20250815183248.py +144 -0
- neverlib/.history/metrics/dnsmos_20250815183407.py +142 -0
- neverlib/.history/metrics/dnsmos_20250815183409.py +142 -0
- neverlib/.history/metrics/dnsmos_20250815183431.py +142 -0
- neverlib/.history/metrics/dnsmos_20250815183507.py +140 -0
- neverlib/.history/metrics/dnsmos_20250815183513.py +139 -0
- neverlib/.history/metrics/dnsmos_20250815183618.py +139 -0
- neverlib/.history/metrics/dnsmos_20250815183709.py +140 -0
- neverlib/.history/metrics/dnsmos_20250815183756.py +137 -0
- neverlib/.history/metrics/dnsmos_20250815183815.py +128 -0
- neverlib/.history/metrics/dnsmos_20250815183827.py +129 -0
- neverlib/.history/metrics/dnsmos_20250815183913.py +117 -0
- neverlib/.history/metrics/dnsmos_20250815183914.py +117 -0
- neverlib/.history/metrics/dnsmos_20250815184003.py +118 -0
- neverlib/.history/metrics/dnsmos_20250815184040.py +118 -0
- neverlib/.history/metrics/dnsmos_20250815184049.py +118 -0
- neverlib/.history/metrics/dnsmos_20250815184104.py +117 -0
- neverlib/.history/metrics/dnsmos_20250815184200.py +117 -0
- neverlib/.history/metrics/lpc_lsp_metric_20250816015944.py +128 -0
- neverlib/.history/metrics/lpc_lsp_metric_20250816020142.py +128 -0
- neverlib/.history/metrics/lpc_lsp_metric_20250816020156.py +128 -0
- neverlib/.history/metrics/lpc_lsp_metric_20250816020554.py +130 -0
- neverlib/.history/metrics/lpc_lsp_metric_20250816020600.py +125 -0
- neverlib/.history/metrics/lpc_lsp_metric_20250816020631.py +120 -0
- neverlib/.history/metrics/lpc_lsp_metric_20250816020746.py +118 -0
- neverlib/.history/metrics/lpc_me_20250816013111.py +0 -0
- neverlib/.history/metrics/lpc_me_20250816013129.py +121 -0
- neverlib/.history/metrics/lpc_me_20250816015430.py +103 -0
- neverlib/.history/metrics/lpc_me_20250816015535.py +96 -0
- neverlib/.history/metrics/lpc_me_20250816015542.py +96 -0
- neverlib/.history/metrics/lpc_me_20250816015636.py +97 -0
- neverlib/.history/metrics/lpc_me_20250816015658.py +104 -0
- neverlib/.history/metrics/lpc_me_20250816015703.py +100 -0
- neverlib/.history/metrics/lpc_me_20250816015945.py +128 -0
- neverlib/.history/metrics/snr_20250806010538.py +177 -0
- neverlib/.history/metrics/snr_20250806211634.py +184 -0
- neverlib/.history/metrics/spec_20250805234209.py +45 -0
- neverlib/.history/metrics/spec_20250816135530.py +11 -0
- neverlib/.history/metrics/spec_20250816135654.py +16 -0
- neverlib/.history/metrics/spec_20250816135736.py +68 -0
- neverlib/.history/metrics/spec_20250816135904.py +75 -0
- neverlib/.history/metrics/spec_20250816135921.py +82 -0
- neverlib/.history/metrics/spec_20250816140111.py +82 -0
- neverlib/.history/metrics/spec_20250816140543.py +136 -0
- neverlib/.history/metrics/spec_20250816140559.py +172 -0
- neverlib/.history/metrics/spec_20250816140602.py +172 -0
- neverlib/.history/metrics/spec_20250816140608.py +172 -0
- neverlib/.history/metrics/spec_20250816140654.py +148 -0
- neverlib/.history/metrics/spec_20250816140705.py +144 -0
- neverlib/.history/metrics/spec_20250816140755.py +138 -0
- neverlib/.history/metrics/spec_20250816140823.py +170 -0
- neverlib/.history/metrics/spec_20250816140832.py +170 -0
- neverlib/.history/metrics/spec_20250816140833.py +170 -0
- neverlib/.history/metrics/spec_20250816140922.py +147 -0
- neverlib/.history/metrics/spec_20250816141148.py +107 -0
- neverlib/.history/metrics/spec_20250816141219.py +123 -0
- neverlib/.history/metrics/spec_20250816141732.py +178 -0
- neverlib/.history/metrics/spec_20250816141740.py +178 -0
- neverlib/.history/metrics/spec_20250816142030.py +178 -0
- neverlib/.history/metrics/spec_20250816142107.py +135 -0
- neverlib/.history/metrics/spec_20250816142126.py +135 -0
- neverlib/.history/metrics/spec_20250816142410.py +135 -0
- neverlib/.history/metrics/spec_20250816142415.py +136 -0
- neverlib/.history/metrics/spec_metric_20250816135156.py +0 -0
- neverlib/.history/metrics/spec_metric_20250816135226.py +5 -0
- neverlib/.history/metrics/spec_metric_20250816135227.py +10 -0
- neverlib/.history/metrics/spec_metric_20250816135306.py +15 -0
- neverlib/.history/metrics/spec_metric_20250816135442.py +31 -0
- neverlib/.history/metrics/spec_metric_20250816135448.py +31 -0
- neverlib/.history/metrics/spec_metric_20250816135520.py +29 -0
- neverlib/.history/metrics/spec_metric_20250816135537.py +63 -0
- neverlib/.history/metrics/spec_metric_20250816135653.py +65 -0
- neverlib/.history/vad/PreProcess_20250805234211.py +63 -0
- neverlib/.history/vad/PreProcess_20250809232455.py +63 -0
- neverlib/.history/vad/PreProcess_20250816020725.py +66 -0
- neverlib/.history/vad/VAD_Silero_20250805234211.py +50 -0
- neverlib/.history/vad/VAD_Silero_20250809232456.py +50 -0
- neverlib/.history/vad/VAD_WebRTC_20250805234211.py +61 -0
- neverlib/.history/vad/VAD_WebRTC_20250809232456.py +61 -0
- neverlib/.history/vad/VAD_funasr_20250805234211.py +54 -0
- neverlib/.history/vad/VAD_funasr_20250809232456.py +54 -0
- neverlib/.history/vad/VAD_vadlib_20250805234211.py +70 -0
- neverlib/.history/vad/VAD_vadlib_20250809232455.py +70 -0
- neverlib/.history/vad/VAD_whisper_20250805234211.py +55 -0
- neverlib/.history/vad/VAD_whisper_20250809232456.py +55 -0
- neverlib/.specstory/.what-is-this.md +69 -0
- neverlib/.specstory/history/2025-08-05_17-06Z-/350/277/231/344/270/200/346/255/245/347/232/204/347/233/256/347/232/204/346/230/257/344/273/200/344/271/210.md +424 -0
- neverlib/Docs/audio_aug/test_snr.py +55 -0
- neverlib/audio_aug/HarmonicDistortion.py +79 -0
- neverlib/audio_aug/TFDrop.py +41 -0
- neverlib/audio_aug/TFMask.py +56 -0
- neverlib/audio_aug/audio_aug.py +16 -1
- neverlib/audio_aug/clip_aug.py +41 -0
- neverlib/audio_aug/coder_aug.py +209 -0
- neverlib/audio_aug/coder_aug2.py +118 -0
- neverlib/audio_aug/loss_packet_aug.py +103 -0
- neverlib/audio_aug/quant_aug.py +78 -0
- neverlib/data_analyze/__init__.py +14 -0
- neverlib/filter/auto_eq/__init__.py +36 -0
- neverlib/filter/auto_eq/de_eq.py +360 -0
- neverlib/filter/auto_eq/freq_eq.py +76 -0
- neverlib/filter/{AudoEQ/auto_eq_ga_advanced.py → auto_eq/ga_eq_advanced.py} +1 -1
- neverlib/filter/auto_eq/ga_eq_basic.py +385 -0
- neverlib/metrics/dnsmos.py +58 -101
- neverlib/metrics/lpc_lsp.py +118 -0
- neverlib/metrics/snr.py +11 -4
- neverlib/metrics/spec.py +136 -45
- neverlib/utils/utils.py +17 -14
- neverlib/vad/PreProcess.py +5 -2
- neverlib/vad/VAD_Silero.py +1 -1
- neverlib/vad/VAD_WebRTC.py +1 -1
- neverlib/vad/VAD_funasr.py +1 -1
- neverlib/vad/VAD_vadlib.py +1 -1
- neverlib/vad/VAD_whisper.py +1 -1
- {neverlib-0.2.3.dist-info → neverlib-0.2.4.dist-info}/METADATA +1 -1
- neverlib-0.2.4.dist-info/RECORD +229 -0
- neverlib-0.2.3.dist-info/RECORD +0 -53
- /neverlib/{dataAnalyze/__init__.py → .history/dataAnalyze/__init___20250805234204.py} +0 -0
- /neverlib/{filter/AudoEQ/auto_eq_spectral_direct.py → .history/filter/auto_eq/freq_eq_20250805234206.py} +0 -0
- /neverlib/{dataAnalyze → data_analyze}/README.md +0 -0
- /neverlib/{dataAnalyze → data_analyze}/dataset_analyzer.py +0 -0
- /neverlib/{dataAnalyze → data_analyze}/quality_metrics.py +0 -0
- /neverlib/{dataAnalyze → data_analyze}/rms_distrubution.py +0 -0
- /neverlib/{dataAnalyze → data_analyze}/spectral_analysis.py +0 -0
- /neverlib/{dataAnalyze → data_analyze}/statistics.py +0 -0
- /neverlib/{dataAnalyze → data_analyze}/temporal_features.py +0 -0
- /neverlib/{dataAnalyze → data_analyze}/visualization.py +0 -0
- /neverlib/filter/{AudoEQ → auto_eq}/README.md +0 -0
- {neverlib-0.2.3.dist-info → neverlib-0.2.4.dist-info}/WHEEL +0 -0
- {neverlib-0.2.3.dist-info → neverlib-0.2.4.dist-info}/licenses/LICENSE +0 -0
- {neverlib-0.2.3.dist-info → neverlib-0.2.4.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
'''
|
|
2
|
+
Author: 凌逆战 | Never
|
|
3
|
+
Date: 2025-07-29 17:06:28
|
|
4
|
+
Description:
|
|
5
|
+
'''
|
|
6
|
+
import sys
|
|
7
|
+
sys.path.append("..")
|
|
8
|
+
import librosa
|
|
9
|
+
import numpy as np
|
|
10
|
+
import soundfile as sf
|
|
11
|
+
import random
|
|
12
|
+
from audiomentations import Compose, ClippingDistortion
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def clipping_aug(wav):
|
|
16
|
+
"""
|
|
17
|
+
模拟录音设备或音频处理设备的动态范围限制
|
|
18
|
+
"""
|
|
19
|
+
wav = wav / np.max(np.abs(wav)) # 归一化
|
|
20
|
+
gain = random.uniform(1.0, 2) # 增益
|
|
21
|
+
wav = wav * gain
|
|
22
|
+
wav = np.clip(wav, -1.0, 1.0)
|
|
23
|
+
|
|
24
|
+
return wav
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
wav_path = "/data/never/Desktop/kws_train/QA/wav_data/TIMIT.wav"
|
|
28
|
+
wav, wav_sr = sf.read(wav_path, always_2d=True)
|
|
29
|
+
wav = wav.T
|
|
30
|
+
print(wav.shape)
|
|
31
|
+
|
|
32
|
+
# 应用削波增强
|
|
33
|
+
# 我们让削波阈值在音频振幅的50%到75%之间随机选择
|
|
34
|
+
# 这意味着信号中最响亮的25%到50%的部分将被削平
|
|
35
|
+
y_clipped = clipping_aug(wav, wav_sr, min_percentile=50, max_percentile=75)
|
|
36
|
+
|
|
37
|
+
# 保存增强后的音频
|
|
38
|
+
output_path = './augmented_clipped.wav'
|
|
39
|
+
sf.write(output_path, y_clipped.T, wav_sr)
|
|
40
|
+
|
|
41
|
+
print(f"削波增强完成!增强后的音频已保存至: {output_path}")
|
|
@@ -0,0 +1,209 @@
|
|
|
1
|
+
'''
|
|
2
|
+
Author: 凌逆战 | Never
|
|
3
|
+
Date: 2025-07-29 16:52:10
|
|
4
|
+
Description:
|
|
5
|
+
'''
|
|
6
|
+
"""
|
|
7
|
+
语音编码器数据增强
|
|
8
|
+
MP3 (MPEG-1 Audio Layer III)
|
|
9
|
+
- 主要用途:音乐分发、播客。互联网音频的“元老”和事实标准。
|
|
10
|
+
- 压缩特性:在中低码率下, 高频部分可能会有“嗖嗖”声或模糊感 (swishing artifacts)。
|
|
11
|
+
- 数据增强目的:模拟通用网络音频压缩。
|
|
12
|
+
|
|
13
|
+
AAC (Advanced Audio Coding)
|
|
14
|
+
- 主要用途:流媒体、视频文件、现代设备。被认为是 MP3 的继任者。
|
|
15
|
+
- 压缩特性:在同等码率下, 通常比 MP3 保留更多高频细节, 音质更好。
|
|
16
|
+
- 数据增强目的:模拟现代流媒体和移动设备上的音频压缩。
|
|
17
|
+
|
|
18
|
+
AMR (Adaptive Multi-Rate)
|
|
19
|
+
- 主要用途:语音通话、移动通信。专为语音优化。
|
|
20
|
+
- 压缩特性:严格为语音设计, 会滤除大部分非语音频率(如音乐), 导致音乐听起来“电话音”效果。
|
|
21
|
+
- 数据增强目的:固定采样率:AMR-NB (窄带) 为 8kHz, AMR-WB (宽带) 为 16kHz。这一点至关重要!
|
|
22
|
+
"""
|
|
23
|
+
import numpy as np
|
|
24
|
+
import soundfile as sf
|
|
25
|
+
from audiomentations import Mp3Compression
|
|
26
|
+
import av
|
|
27
|
+
import random
|
|
28
|
+
|
|
29
|
+
# mp3编解码数据增强
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def mp3_aug(wav, sr):
|
|
33
|
+
# return Mp3Compression(min_bitrate=64, max_bitrate=192, p=1.0)(samples, sample_rate)
|
|
34
|
+
return sf.write('audio.mp3', wav, sr, format='MP3', bitrate='192k')
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def vorbis_aug(wav, sr):
|
|
38
|
+
sf.write('audio.ogg', wav, sr, subtype='VORBIS')
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def flac_aug(wav, sr):
|
|
42
|
+
sf.write(wav, sr, format='FLAC')
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def opus_aug_save(wav: np.ndarray, sr: int, output_filepath: str):
|
|
46
|
+
"""
|
|
47
|
+
对音频进行 Opus 压缩, 并直接保存到文件。
|
|
48
|
+
使用 PyAV 实现, 比特率是随机的。
|
|
49
|
+
"""
|
|
50
|
+
# 随机选择一个比特率 (kbps)
|
|
51
|
+
bitrate_kbps = random.choice([24, 32, 48, 64, 96, 128])
|
|
52
|
+
output_filepath_with_bitrate = output_filepath.replace('.opus', f'_{bitrate_kbps}k.opus')
|
|
53
|
+
|
|
54
|
+
print(f" -> Saving Opus augmented version to: {output_filepath_with_bitrate} (Bitrate: {bitrate_kbps}k)")
|
|
55
|
+
|
|
56
|
+
# PyAV 需要 (n_channels, n_samples) 格式
|
|
57
|
+
wav_ch_first = wav.T if wav.ndim > 1 else wav.reshape(1, -1)
|
|
58
|
+
layout = 'stereo' if wav.ndim > 1 else 'mono'
|
|
59
|
+
|
|
60
|
+
with av.open(output_filepath_with_bitrate, mode='w') as container:
|
|
61
|
+
stream = container.add_stream('libopus', rate=sr, layout=layout)
|
|
62
|
+
stream.bit_rate = bitrate_kbps * 1000
|
|
63
|
+
|
|
64
|
+
# 确保数据是 float32
|
|
65
|
+
if wav.dtype != np.float32:
|
|
66
|
+
wav = wav.astype(np.float32)
|
|
67
|
+
|
|
68
|
+
frame = av.AudioFrame.from_ndarray(wav_ch_first, format='flt')
|
|
69
|
+
frame.sample_rate = sr
|
|
70
|
+
|
|
71
|
+
# 编码并写入文件
|
|
72
|
+
for packet in stream.encode(frame):
|
|
73
|
+
container.mux(packet)
|
|
74
|
+
# Flush aac encoder
|
|
75
|
+
for packet in stream.encode(None):
|
|
76
|
+
container.mux(packet)
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def aac_aug_save(wav: np.ndarray, sr: int, output_filepath: str):
|
|
80
|
+
"""
|
|
81
|
+
对音频进行 AAC 压缩, 并直接保存到文件。
|
|
82
|
+
使用 PyAV 实现, 比特率是随机的。
|
|
83
|
+
"""
|
|
84
|
+
# 随机选择一个比特率 (kbps)
|
|
85
|
+
bitrate_kbps = random.choice([48, 64, 96, 128, 160, 192])
|
|
86
|
+
# .m4a 是 AAC 更常用的文件后缀
|
|
87
|
+
output_filepath_with_bitrate = output_filepath.replace('.aac', f'_{bitrate_kbps}k.m4a')
|
|
88
|
+
|
|
89
|
+
print(f" -> Saving AAC augmented version to: {output_filepath_with_bitrate} (Bitrate: {bitrate_kbps}k)")
|
|
90
|
+
|
|
91
|
+
# PyAV 需要 (n_channels, n_samples) 格式
|
|
92
|
+
wav_ch_first = wav.T if wav.ndim > 1 else wav.reshape(1, -1)
|
|
93
|
+
layout = 'stereo' if wav.ndim > 1 else 'mono'
|
|
94
|
+
|
|
95
|
+
# 注意:format='adts' 是原始 AAC 流, 'mp4' 会创建 .m4a/.mp4 容器
|
|
96
|
+
with av.open(output_filepath_with_bitrate, mode='w', format='mp4') as container:
|
|
97
|
+
# 使用高质量的 fdk_aac 编码器
|
|
98
|
+
stream = container.add_stream('libfdk_aac', rate=sr, layout=layout)
|
|
99
|
+
stream.bit_rate = bitrate_kbps * 1000
|
|
100
|
+
|
|
101
|
+
if wav.dtype != np.float32:
|
|
102
|
+
wav = wav.astype(np.float32)
|
|
103
|
+
|
|
104
|
+
frame = av.AudioFrame.from_ndarray(wav_ch_first, format='flt')
|
|
105
|
+
frame.sample_rate = sr
|
|
106
|
+
|
|
107
|
+
for packet in stream.encode(frame):
|
|
108
|
+
container.mux(packet)
|
|
109
|
+
for packet in stream.encode(None):
|
|
110
|
+
container.mux(packet)
|
|
111
|
+
print(f" ... success.")
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def flac_encode_save(
|
|
115
|
+
wav: np.ndarray,
|
|
116
|
+
sr: int,
|
|
117
|
+
output_filepath: str,
|
|
118
|
+
compression_level: int = 5,
|
|
119
|
+
bits_per_sample=None
|
|
120
|
+
):
|
|
121
|
+
"""
|
|
122
|
+
使用 pyFLAC 将 NumPy 音频数组编码为 FLAC 文件并保存。
|
|
123
|
+
|
|
124
|
+
参数:
|
|
125
|
+
wav (np.ndarray): 输入的音频数据。可以是 float 类型 (范围 -1.0 到 1.0)
|
|
126
|
+
或 int16/int32 类型。
|
|
127
|
+
sr (int): 音频的采样率。
|
|
128
|
+
output_filepath (str): 输出的 .flac 文件路径。
|
|
129
|
+
compression_level (int, optional): FLAC 压缩级别, 范围 0 (最快) 到 8 (最高压缩, 最慢)。
|
|
130
|
+
默认为 5, 是一个很好的平衡点。
|
|
131
|
+
bits_per_sample (int, optional): 每个样本的位数。通常是 16 或 24。
|
|
132
|
+
如果为 None, 函数会根据输入 wav 的 dtype 自动推断。
|
|
133
|
+
默认为 None。
|
|
134
|
+
"""
|
|
135
|
+
|
|
136
|
+
# --- 1. 数据类型和位深处理 ---
|
|
137
|
+
# pyFLAC 的 Encoder 需要 int16 或 int32 格式的 NumPy 数组。
|
|
138
|
+
# 我们需要根据输入数据进行转换。
|
|
139
|
+
|
|
140
|
+
if bits_per_sample is None:
|
|
141
|
+
# 自动推断位深
|
|
142
|
+
if wav.dtype == np.int16:
|
|
143
|
+
bits_per_sample = 16
|
|
144
|
+
elif wav.dtype == np.int32:
|
|
145
|
+
bits_per_sample = 32
|
|
146
|
+
else:
|
|
147
|
+
# 默认将 float 类型转换为 16-bit
|
|
148
|
+
bits_per_sample = 16
|
|
149
|
+
|
|
150
|
+
# 根据确定的位深, 转换数据
|
|
151
|
+
if bits_per_sample == 16:
|
|
152
|
+
if wav.dtype != np.int16:
|
|
153
|
+
# 将 float [-1, 1] 转换为 int16 [-32768, 32767]
|
|
154
|
+
print(" ... converting audio to int16 for encoding.")
|
|
155
|
+
wav_int = (wav * 32767).astype(np.int16)
|
|
156
|
+
else:
|
|
157
|
+
wav_int = wav
|
|
158
|
+
elif bits_per_sample == 24 or bits_per_sample == 32:
|
|
159
|
+
bits_per_sample = 24 # FLAC 更常用 24-bit
|
|
160
|
+
if wav.dtype != np.int32:
|
|
161
|
+
print(" ... converting audio to int32 (for 24-bit FLAC).")
|
|
162
|
+
# 转换为 24-bit 范围内的 int32
|
|
163
|
+
wav_int = (wav * 8388607).astype(np.int32)
|
|
164
|
+
else:
|
|
165
|
+
wav_int = wav
|
|
166
|
+
else:
|
|
167
|
+
raise ValueError(f"Unsupported bits_per_sample: {bits_per_sample}. Must be 16, 24, or 32.")
|
|
168
|
+
|
|
169
|
+
# --- 2. 初始化编码器 ---
|
|
170
|
+
encoder = Encoder(
|
|
171
|
+
sample_rate=sr,
|
|
172
|
+
bits_per_sample=bits_per_sample,
|
|
173
|
+
compression_level=compression_level
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
# --- 3. 处理数据并获取编码后的字节 ---
|
|
177
|
+
# Encoder.process() 可以分块处理, 但对于中等长度的音频, 一次性处理更简单
|
|
178
|
+
encoded_bytes = encoder.process(wav_int)
|
|
179
|
+
|
|
180
|
+
# --- 4. 将字节写入文件 ---
|
|
181
|
+
with open(output_filepath, 'wb') as f:
|
|
182
|
+
f.write(encoded_bytes)
|
|
183
|
+
|
|
184
|
+
print(f" ... success.")
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
# AMR编解码数据增强
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
def amr_nb_aug(samples, sample_rate):
|
|
191
|
+
# return ApplyCodec(encoder="libamr_nb", p=1.0)(samples, sample_rate)
|
|
192
|
+
return sf.write('audio.amr', wav, sr, format='AMR', bitrate='192k')
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
def amr_wb_aug(wav, sr):
|
|
196
|
+
# return ApplyCodec(encoder="libamr_wb", p=1.0)(samples, sample_rate)
|
|
197
|
+
return sf.write('audio.amr', wav, sr, format='AMR', bitrate='192k')
|
|
198
|
+
|
|
199
|
+
# Opus 编解码数据增强
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
if __name__ == "__main__":
|
|
203
|
+
wav_path = "/data/never/Desktop/kws_train/QA/wav_data/TIMIT.wav"
|
|
204
|
+
wav, sr = sf.read(wav_path, always_2d=True)
|
|
205
|
+
# mp3_aug(wav, sr)
|
|
206
|
+
ogg_aug(wav, sr)
|
|
207
|
+
# amr_nb_aug(wav, sr)
|
|
208
|
+
# amr_wb_aug(wav, sr)
|
|
209
|
+
# opus_aug(wav, sr)
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
'''
|
|
2
|
+
Author: 凌逆战 | Never
|
|
3
|
+
Date: 2025-07-29 17:57:26
|
|
4
|
+
Description:
|
|
5
|
+
'''
|
|
6
|
+
import numpy as np
|
|
7
|
+
import librosa
|
|
8
|
+
import soundfile as sf
|
|
9
|
+
import subprocess
|
|
10
|
+
import os
|
|
11
|
+
import random
|
|
12
|
+
import io
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def check_codec_available(codec):
|
|
16
|
+
"""检查指定的编解码器是否在FFmpeg中可用"""
|
|
17
|
+
try:
|
|
18
|
+
result = subprocess.run(['ffmpeg', '-encoders'], capture_output=True, text=True)
|
|
19
|
+
return codec in result.stdout
|
|
20
|
+
except:
|
|
21
|
+
return False
|
|
22
|
+
|
|
23
|
+
def apply_codec_distortion(wav, sr, codec='libopus', bitrate='24k'):
|
|
24
|
+
"""
|
|
25
|
+
使用 FFmpeg 对音频应用指定的编解码器和码率, 以模拟有损压缩失真。
|
|
26
|
+
|
|
27
|
+
参数:
|
|
28
|
+
wav (np.ndarray): 输入的音频波形。
|
|
29
|
+
sr (int): 采样率。
|
|
30
|
+
codec (str): FFmpeg 支持的编码器名称。
|
|
31
|
+
例如: 'aac', 'libopus', 'amr_nb', 'amr_wb', 'mp3'。
|
|
32
|
+
bitrate (str): 目标码率, FFmpeg 格式。例如: '64k', '24k', '12.2k'。
|
|
33
|
+
|
|
34
|
+
返回:
|
|
35
|
+
np.ndarray: 经过编解码器失真的音频波形。
|
|
36
|
+
"""
|
|
37
|
+
# 检查编解码器是否可用
|
|
38
|
+
if not check_codec_available(codec):
|
|
39
|
+
print(f"编解码器 {codec} 不可用, 跳过处理...")
|
|
40
|
+
return wav
|
|
41
|
+
# 根据编解码器确定正确的输出文件扩展名
|
|
42
|
+
if codec == 'libopus':
|
|
43
|
+
output_ext = '.opus'
|
|
44
|
+
elif codec == 'aac':
|
|
45
|
+
output_ext = '.m4a' # AAC 通常用 m4a 封装
|
|
46
|
+
elif codec in ['amr_nb', 'amr_wb']:
|
|
47
|
+
output_ext = '.amr'
|
|
48
|
+
else:
|
|
49
|
+
output_ext = f'.{codec.split("_")[0]}'
|
|
50
|
+
|
|
51
|
+
input_filename = f"temp_input_{codec}_{bitrate}.wav"
|
|
52
|
+
output_filename = f"temp_output_{codec}_{bitrate}{output_ext}"
|
|
53
|
+
|
|
54
|
+
try:
|
|
55
|
+
# 1. 将 NumPy 数组写入临时的输入 WAV 文件
|
|
56
|
+
sf.write(input_filename, wav, sr)
|
|
57
|
+
|
|
58
|
+
# 2. 构建 FFmpeg 命令
|
|
59
|
+
command = ['ffmpeg', '-y', '-i', input_filename, '-c:a', codec, '-b:a', bitrate]
|
|
60
|
+
|
|
61
|
+
# 3. 为 AMR 编解码器添加重采样参数
|
|
62
|
+
if codec in ['amr_nb', 'amr_wb']:
|
|
63
|
+
command.extend(['-ar', '8000']) # AMR-NB 需要 8kHz 采样率
|
|
64
|
+
|
|
65
|
+
# 4. 为 AAC 指定输出格式 (移除 -f adts, 使用 MP4 容器)
|
|
66
|
+
# if codec == 'aac':
|
|
67
|
+
# command.extend(['-f', 'adts'])
|
|
68
|
+
|
|
69
|
+
command.append(output_filename)
|
|
70
|
+
|
|
71
|
+
# 执行命令, 并隐藏输出
|
|
72
|
+
subprocess.run(command, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
|
73
|
+
|
|
74
|
+
# 4. 将编码后的文件转换回 WAV 格式以便读取
|
|
75
|
+
wav_output = f"temp_final_{codec}_{bitrate}.wav"
|
|
76
|
+
subprocess.run(['ffmpeg', '-y', '-i', output_filename, wav_output],
|
|
77
|
+
check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
|
78
|
+
|
|
79
|
+
# 5. 从 WAV 文件读回失真的音频
|
|
80
|
+
samples = sf.read(wav_output)[0]
|
|
81
|
+
|
|
82
|
+
return samples
|
|
83
|
+
|
|
84
|
+
except Exception as e:
|
|
85
|
+
print(f"FFmpeg 处理失败: {e}")
|
|
86
|
+
# 如果失败, 返回原始音频
|
|
87
|
+
return wav
|
|
88
|
+
finally:
|
|
89
|
+
# 6. 清理临时文件
|
|
90
|
+
for temp_file in [input_filename, output_filename, f"temp_final_{codec}_{bitrate}.wav"]:
|
|
91
|
+
if os.path.exists(temp_file):
|
|
92
|
+
os.remove(temp_file)
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
# --- 使用示例 ---
|
|
96
|
+
wav_path = "/data/never/Desktop/kws_train/QA/wav_data/TIMIT.wav"
|
|
97
|
+
wav, wav_sr = sf.read(wav_path, always_2d=True)
|
|
98
|
+
|
|
99
|
+
# 1. 模拟 Opus 编解码器(常用于VoIP, WebRTC)
|
|
100
|
+
print("应用 Opus 编解码器失真...")
|
|
101
|
+
opus_wav = apply_codec_distortion(wav, wav_sr, codec='libopus', bitrate='24k')
|
|
102
|
+
sf.write('augmented_opus.wav', opus_wav, wav_sr)
|
|
103
|
+
|
|
104
|
+
# 2. 模拟 AAC 编解码器(常用于流媒体, Apple设备)
|
|
105
|
+
print("应用 AAC 编解码器失真...")
|
|
106
|
+
aac_wav = apply_codec_distortion(wav, wav_sr, codec='aac', bitrate='64k')
|
|
107
|
+
sf.write('augmented_aac.wav', aac_wav, wav_sr)
|
|
108
|
+
|
|
109
|
+
# 3. 模拟 AMR-NB 编解码器(常用于传统移动通信)
|
|
110
|
+
# AMR-NB 的码率是固定的几个值之一
|
|
111
|
+
amr_bitrates = ['4.75k', '5.15k', '5.9k', '6.7k', '7.4k', '7.95k', '10.2k', '12.2k']
|
|
112
|
+
chosen_amr_bitrate = random.choice(amr_bitrates)
|
|
113
|
+
print(f"应用 AMR-NB @ {chosen_amr_bitrate} 编解码器失真...")
|
|
114
|
+
amr_wav = apply_codec_distortion(wav, wav_sr, codec='amr_nb', bitrate=chosen_amr_bitrate)
|
|
115
|
+
# 注意:AMR通常是8kHz采样, librosa加载时会自动重采样, 这里我们保持原始sr
|
|
116
|
+
sf.write('augmented_amr.wav', amr_wav, wav_sr)
|
|
117
|
+
|
|
118
|
+
print("所有编解码器增强完成!")
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
'''
|
|
2
|
+
Author: 凌逆战 | Never
|
|
3
|
+
Date: 2025-07-29 16:28:23
|
|
4
|
+
Description: 丢包数据增强
|
|
5
|
+
“置零” vs “缺失”:两种不同的模拟思路
|
|
6
|
+
结论:对于音质修复, 强烈推荐使用“置零”法, 而不是“删除”法。
|
|
7
|
+
|
|
8
|
+
理由如下:
|
|
9
|
+
|
|
10
|
+
- 保持时序对齐 (Temporal Alignment): 在音质修复任务中, 模型需要一个一一对应的关系。输入 damaged_audio 的第 t 秒, 应该对应输出 repaired_audio 的第 t 秒, 也对应原始 original_audio 的第 t 秒。
|
|
11
|
+
如果使用“删除”法, 输入音频变短, 这种对齐关系就被破坏了, 模型无法学习 (损坏的t时刻) -> (修复的t时刻) 的映射。
|
|
12
|
+
- 简化模型训练: 输入和输出的长度保持一致, 意味着你可以直接使用标准的模型架构(如 U-Net), 而不需要处理复杂的可变长度序列问题。
|
|
13
|
+
- 更贴近修复任务的本质: 音质修复, 特别是丢包补偿 (Packet Loss Concealment, PLC), 其任务本质是**“根据上下文, 猜测并填充一段丢失的音频”**。
|
|
14
|
+
|
|
15
|
+
“置零”法完美地创造了这样一个场景:模型看到了上下文, 也看到了一个明确的“空白”(零区域), 它的任务就是把这个空白填上。
|
|
16
|
+
“删除”法则改变了问题的性质, 变成了“检测不连续点并试图将其平滑化”, 这与 PLC 的目标不完全一致。
|
|
17
|
+
|
|
18
|
+
“置零”是在深度学习框架下对“真正丢弃”问题的一种高效、可解的数学建模。 我们牺牲了一点点物理上的真实性, 换来了模型训练的可行性和高效性。
|
|
19
|
+
'''
|
|
20
|
+
import numpy as np
|
|
21
|
+
import random
|
|
22
|
+
import soundfile as sf
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def simulate_packet_loss_vectorized(
|
|
26
|
+
wav: np.ndarray,
|
|
27
|
+
sample_rate: int,
|
|
28
|
+
packet_duration_ms: int = 10,
|
|
29
|
+
loss_rate: float = 0.05,
|
|
30
|
+
burst_prob: float = 0.2
|
|
31
|
+
) -> np.ndarray:
|
|
32
|
+
"""
|
|
33
|
+
模拟带有突发性的网络丢包(向量化版本)。
|
|
34
|
+
使用 NumPy 的向量化操作以获得极高的性能, 避免在 Python 中使用 for 循环。
|
|
35
|
+
|
|
36
|
+
参数:
|
|
37
|
+
- wav: 原始音频波形 (NumPy 数组)。
|
|
38
|
+
- sample_rate: 采样率。
|
|
39
|
+
- packet_duration_ms: 每个数据包的时长(毫秒)。
|
|
40
|
+
packet_duration_ms_list= np.arange(10, 60, 5) # 包时长一般为10-60ms, 5ms间隔
|
|
41
|
+
packet_duration_ms = random.choice(packet_duration_ms_list)
|
|
42
|
+
- loss_rate: 基础丢包率。
|
|
43
|
+
- burst_prob: 突发丢包概率。
|
|
44
|
+
|
|
45
|
+
返回:
|
|
46
|
+
- 损坏后的音频波形(与原始长度相同)。
|
|
47
|
+
"""
|
|
48
|
+
# 0. 复制数组, 避免修改原始输入
|
|
49
|
+
damaged_wav = wav.copy()
|
|
50
|
+
|
|
51
|
+
# 1. 计算数据包参数
|
|
52
|
+
packet_size = int(packet_duration_ms * sample_rate / 1000)
|
|
53
|
+
if packet_size == 0:
|
|
54
|
+
return damaged_wav
|
|
55
|
+
|
|
56
|
+
num_samples = len(damaged_wav)
|
|
57
|
+
num_packets = num_samples // packet_size
|
|
58
|
+
|
|
59
|
+
# 2. 一次性生成所有包的随机数, 用于决定是否丢包
|
|
60
|
+
rand_nums = np.random.rand(num_packets)
|
|
61
|
+
|
|
62
|
+
# 3. 生成一个表示“是否丢失”的布尔掩码 (loss_mask)
|
|
63
|
+
# 初始状态下, 所有包都根据基础丢包率决定是否丢失
|
|
64
|
+
loss_mask = rand_nums < loss_rate
|
|
65
|
+
|
|
66
|
+
# 4. 模拟突发丢包 (Burst Loss)
|
|
67
|
+
# 找到所有根据基础概率可能丢失的包 (potential_burst_starters)
|
|
68
|
+
# 对于这些包的下一个包, 以更高的 burst_prob 来决定是否丢失
|
|
69
|
+
# 我们通过对 loss_mask 进行移位和逻辑运算来高效实现
|
|
70
|
+
# np.roll(loss_mask, 1) 将掩码向右移动一位, 模拟“前一个包”
|
|
71
|
+
# 第一个包没有前一个包, 所以将其状态设为 False
|
|
72
|
+
prev_lost_mask = np.roll(loss_mask, 1)
|
|
73
|
+
prev_lost_mask[0] = False
|
|
74
|
+
|
|
75
|
+
# 现在, 如果一个包的前一个包丢失了 (prev_lost_mask is True),
|
|
76
|
+
# 那么它有 burst_prob 的概率丢失
|
|
77
|
+
burst_loss_candidates = rand_nums < burst_prob
|
|
78
|
+
|
|
79
|
+
# 更新 loss_mask: 如果一个包的前一个丢了, 并且它也满足突发条件, 那么它就丢失
|
|
80
|
+
loss_mask = np.logical_or(loss_mask, np.logical_and(prev_lost_mask, burst_loss_candidates))
|
|
81
|
+
|
|
82
|
+
# 5. 将布尔掩码扩展到整个样本维度
|
|
83
|
+
# np.repeat 会将每个包的丢失状态 (True/False) 复制 packet_size 次
|
|
84
|
+
# 例如 [False, True] -> [F,F,F, T,T,T] (假设 packet_size=3)
|
|
85
|
+
samples_mask = np.repeat(loss_mask, packet_size)
|
|
86
|
+
|
|
87
|
+
# 6. 一次性将所有被标记为丢失的样本置零
|
|
88
|
+
# 这是另一个核心向量化操作
|
|
89
|
+
# 我们只操作 num_packets * packet_size 长度的区域, 忽略末尾不足一个包的部分
|
|
90
|
+
valid_length = num_packets * packet_size
|
|
91
|
+
damaged_wav[:valid_length][samples_mask] = 0
|
|
92
|
+
|
|
93
|
+
return damaged_wav
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
# 生成一个白噪声
|
|
97
|
+
white_noise = np.random.randn(100000).astype(np.float32)
|
|
98
|
+
|
|
99
|
+
# 生成一个损坏的音频
|
|
100
|
+
damaged_audio = simulate_packet_loss_vectorized(white_noise, 16000, loss_rate=0.1, burst_prob=0.5)
|
|
101
|
+
|
|
102
|
+
# 保存音频
|
|
103
|
+
sf.write("damaged_audio.wav", damaged_audio, 16000)
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
'''
|
|
2
|
+
Author: 凌逆战 | Never
|
|
3
|
+
Date: 2025-03-26 22:13:21
|
|
4
|
+
Description:
|
|
5
|
+
'''
|
|
6
|
+
import os
|
|
7
|
+
import sys
|
|
8
|
+
import numpy as np
|
|
9
|
+
import soundfile as sf
|
|
10
|
+
import librosa
|
|
11
|
+
|
|
12
|
+
print(librosa.__version__)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def apply_uniform_quantization(wav, bit_depth=8):
|
|
16
|
+
"""
|
|
17
|
+
对音频应用均匀量化, 模拟较低位深度的效果。
|
|
18
|
+
|
|
19
|
+
参数:
|
|
20
|
+
wav (np.ndarray): 输入的音频波形, 值应在 [-1.0, 1.0] 范围内。
|
|
21
|
+
bit_depth (int): 目标模拟的位深度。
|
|
22
|
+
|
|
23
|
+
返回:
|
|
24
|
+
np.ndarray: 量化后的音频波形。
|
|
25
|
+
"""
|
|
26
|
+
# 计算量化级别数
|
|
27
|
+
num_levels = 2 ** bit_depth
|
|
28
|
+
|
|
29
|
+
# 1. 将 [-1, 1] 映射到 [0, num_levels - 1]
|
|
30
|
+
# 我们先将 wav 移动到 [0, 2] 范围, 然后缩放
|
|
31
|
+
scaled_wav = (wav + 1.0) / 2.0 * (num_levels - 1)
|
|
32
|
+
|
|
33
|
+
# 2. 四舍五入到最近的整数级别
|
|
34
|
+
quantized_levels = np.round(scaled_wav)
|
|
35
|
+
|
|
36
|
+
# 3. 将整数级别映射回 [-1, 1]
|
|
37
|
+
quantized_wav = (quantized_levels / (num_levels - 1) * 2.0) - 1.0
|
|
38
|
+
|
|
39
|
+
return quantized_wav.astype(np.float32)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def apply_mulaw_quantization(wav, bit_depth=8):
|
|
43
|
+
"""
|
|
44
|
+
【最终正确版】使用 mu_compress 和 mu_expand 模拟 μ-law 量化失真。
|
|
45
|
+
|
|
46
|
+
参数:
|
|
47
|
+
wav (np.ndarray): 输入音频。
|
|
48
|
+
bit_depth (int): 目标模拟的位深度。
|
|
49
|
+
"""
|
|
50
|
+
# mu 的值决定了量化级别的数量 (mu + 1)
|
|
51
|
+
mu = 2**bit_depth - 1
|
|
52
|
+
|
|
53
|
+
# 1. 压缩音频并进行量化 (这是信息丢失的关键步骤)
|
|
54
|
+
# quantize=True 确保了模拟位深度降低的效果
|
|
55
|
+
compressed_wav = librosa.mu_compress(wav, mu=mu, quantize=True)
|
|
56
|
+
|
|
57
|
+
# 2. 扩展信号 (这是解码步骤)
|
|
58
|
+
# 这个过程无法恢复在量化中丢失的信息
|
|
59
|
+
expanded_wav = librosa.mu_expand(compressed_wav, mu=mu)
|
|
60
|
+
|
|
61
|
+
return expanded_wav
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
# --- 使用示例 ---
|
|
65
|
+
wav_path = "/data/never/Desktop/kws_train/QA/wav_data/TIMIT.wav"
|
|
66
|
+
wav, wav_sr = sf.read(wav_path, always_2d=True)
|
|
67
|
+
|
|
68
|
+
# 模拟一个 8-bit 的老式数字音频设备
|
|
69
|
+
y_quantized_8bit = apply_uniform_quantization(wav, bit_depth=8)
|
|
70
|
+
sf.write('augmented_quantized_8bit.wav', y_quantized_8bit, wav_sr)
|
|
71
|
+
|
|
72
|
+
# 模拟一个更差的 4-bit 设备
|
|
73
|
+
y_quantized_4bit = apply_uniform_quantization(wav, bit_depth=4)
|
|
74
|
+
sf.write('augmented_quantized_4bit.wav', y_quantized_4bit, wav_sr)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
y_q = apply_mulaw_quantization(wav, bit_depth=8)
|
|
78
|
+
sf.write('augmented_mulaw_8bit.wav', y_q, wav_sr)
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
'''
|
|
2
|
+
Author: 凌逆战 | Never
|
|
3
|
+
Date: 2025-08-06 00:56:39
|
|
4
|
+
Description:
|
|
5
|
+
'''
|
|
6
|
+
"""
|
|
7
|
+
音频数据分析模块
|
|
8
|
+
Audio Data Analysis Module
|
|
9
|
+
|
|
10
|
+
提供完整的音频数据分析功能, 包括特征提取、质量评估、统计分析和可视化等。
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
# 基础工具
|
|
14
|
+
from neverlib.dataAnalyze.temporal_features import dB, peak_amplitude, rms_amplitude
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
'''
|
|
2
|
+
Author: 凌逆战 | Never
|
|
3
|
+
Date: 2025-08-19 21:26:54
|
|
4
|
+
Description:
|
|
5
|
+
AudoEQ - 自动EQ补偿模块
|
|
6
|
+
Author: 凌逆战 | Never
|
|
7
|
+
|
|
8
|
+
该模块提供多种自动EQ补偿方法:
|
|
9
|
+
- 频谱直接补偿 (auto_eq_spectral_direct)
|
|
10
|
+
- 差分进化优化 (auto_eq_de)
|
|
11
|
+
- 遗传算法基础版 (auto_eq_ga_basic)
|
|
12
|
+
- 遗传算法高级版 (auto_eq_ga_advanced)
|
|
13
|
+
'''
|
|
14
|
+
|
|
15
|
+
# 频谱直接补偿方法
|
|
16
|
+
from .freq_eq import get_freq_eq
|
|
17
|
+
|
|
18
|
+
# 差分进化优化方法
|
|
19
|
+
# from .de_eq import (
|
|
20
|
+
# get_filter_function,
|
|
21
|
+
# match_frequency_response,
|
|
22
|
+
# plot_spectra_comparison
|
|
23
|
+
# )
|
|
24
|
+
|
|
25
|
+
# 遗传算法基础版
|
|
26
|
+
# from .ga_eq_basic import (
|
|
27
|
+
# individual_creator,
|
|
28
|
+
# get_magnitude_spectrum_db,
|
|
29
|
+
# get_single_filter_freq_response_db_from_coeffs,
|
|
30
|
+
# get_combined_eq_response_db,
|
|
31
|
+
# evaluate_individual,
|
|
32
|
+
# custom_mutate,
|
|
33
|
+
# )
|
|
34
|
+
|
|
35
|
+
# 遗传算法高级版
|
|
36
|
+
# from .ga_eq_advanced import EQOptimizer
|