neverlib 0.2.6__py3-none-any.whl → 0.2.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- neverlib/.claude/settings.local.json +9 -0
- neverlib/Docs/audio_aug/test_volume.ipynb +416 -0
- neverlib/Docs/audio_aug_test/test_volume.ipynb +289 -0
- neverlib/Docs/filter/biquad.ipynb +129 -0
- neverlib/Docs/filter/filter_family.ipynb +450 -0
- neverlib/Docs/filter/highpass.ipynb +139 -0
- neverlib/Docs/filter/scipy_filter_family.ipynb +110 -0
- neverlib/Docs/vad/VAD_Energy.ipynb +167 -0
- neverlib/Docs/vad/VAD_Silero.ipynb +325 -0
- neverlib/Docs/vad/VAD_WebRTC.ipynb +189 -0
- neverlib/Docs/vad/VAD_funasr.ipynb +192 -0
- neverlib/Docs/vad/VAD_rvADfast.ipynb +162 -0
- neverlib/Docs/vad/VAD_statistics.ipynb +532 -0
- neverlib/Docs/vad/VAD_tenVAD.ipynb +292 -0
- neverlib/Docs/vad/VAD_vadlib.ipynb +168 -0
- neverlib/Docs/vad/VAD_whisper.ipynb +404 -0
- neverlib/QA/gen_init.py +218 -0
- neverlib/QA/get_fun.py +19 -0
- neverlib/__init__.py +40 -4
- neverlib/audio_aug/HarmonicDistortion.py +19 -13
- neverlib/audio_aug/__init__.py +82 -12
- neverlib/audio_aug/audio_aug.py +19 -14
- neverlib/audio_aug/clip_aug.py +15 -18
- neverlib/audio_aug/coder_aug.py +44 -24
- neverlib/audio_aug/coder_aug2.py +54 -37
- neverlib/audio_aug/loss_packet_aug.py +7 -7
- neverlib/audio_aug/quant_aug.py +19 -17
- neverlib/data/000_short_enhance.wav +0 -0
- neverlib/data/3956_speech.wav +0 -0
- neverlib/data/3956_sweep.wav +0 -0
- neverlib/data/vad_example.wav +0 -0
- neverlib/data/white.wav +0 -0
- neverlib/data/white_EQ.wav +0 -0
- neverlib/data/white_matched.wav +0 -0
- neverlib/data_analyze/__init__.py +69 -20
- neverlib/data_analyze/dataset_analyzer.py +109 -114
- neverlib/data_analyze/quality_metrics.py +87 -89
- neverlib/data_analyze/rms_distrubution.py +23 -42
- neverlib/data_analyze/spectral_analysis.py +43 -46
- neverlib/data_analyze/statistics.py +76 -76
- neverlib/data_analyze/temporal_features.py +15 -6
- neverlib/data_analyze/visualization.py +208 -144
- neverlib/filter/__init__.py +40 -20
- neverlib/filter/auto_eq/__init__.py +50 -31
- neverlib/filter/auto_eq/de_eq.py +0 -2
- neverlib/filter/common.py +24 -5
- neverlib/metrics/DNSMOS/bak_ovr.onnx +0 -0
- neverlib/metrics/DNSMOS/model_v8.onnx +0 -0
- neverlib/metrics/DNSMOS/sig.onnx +0 -0
- neverlib/metrics/DNSMOS/sig_bak_ovr.onnx +0 -0
- neverlib/metrics/__init__.py +59 -0
- neverlib/metrics/dnsmos.py +4 -15
- neverlib/metrics/pDNSMOS/sig_bak_ovr.onnx +0 -0
- neverlib/metrics/pesq_c/PESQ +0 -0
- neverlib/metrics/pesq_c/dsp.c +553 -0
- neverlib/metrics/pesq_c/dsp.h +138 -0
- neverlib/metrics/pesq_c/pesq.h +294 -0
- neverlib/metrics/pesq_c/pesqdsp.c +1047 -0
- neverlib/metrics/pesq_c/pesqio.c +392 -0
- neverlib/metrics/pesq_c/pesqmain.c +610 -0
- neverlib/metrics/pesq_c/pesqmod.c +1417 -0
- neverlib/metrics/pesq_c/pesqpar.h +297 -0
- neverlib/metrics/snr.py +5 -1
- neverlib/metrics/spec.py +31 -21
- neverlib/metrics/test_pesq.py +0 -4
- neverlib/tests/__init__.py +33 -1
- neverlib/tests/test_imports.py +19 -0
- neverlib/utils/__init__.py +71 -15
- neverlib/utils/audio_split.py +6 -1
- neverlib/utils/checkGPU.py +17 -9
- neverlib/utils/lazy_expose.py +29 -0
- neverlib/utils/utils.py +55 -12
- neverlib/vad/PreProcess.py +66 -66
- neverlib/vad/__init__.py +71 -25
- neverlib/vad/class_get_speech.py +1 -1
- neverlib/vad/class_vad.py +3 -3
- neverlib/vad/img.png +0 -0
- {neverlib-0.2.6.dist-info → neverlib-0.2.8.dist-info}/METADATA +1 -1
- {neverlib-0.2.6.dist-info → neverlib-0.2.8.dist-info}/RECORD +82 -39
- {neverlib-0.2.6.dist-info → neverlib-0.2.8.dist-info}/WHEEL +0 -0
- {neverlib-0.2.6.dist-info → neverlib-0.2.8.dist-info}/licenses/LICENSE +0 -0
- {neverlib-0.2.6.dist-info → neverlib-0.2.8.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,416 @@
|
|
|
1
|
+
{
|
|
2
|
+
"cells": [
|
|
3
|
+
{
|
|
4
|
+
"cell_type": "code",
|
|
5
|
+
"execution_count": null,
|
|
6
|
+
"id": "f27585eb",
|
|
7
|
+
"metadata": {},
|
|
8
|
+
"outputs": [
|
|
9
|
+
{
|
|
10
|
+
"ename": "ImportError",
|
|
11
|
+
"evalue": "cannot import name 'volume_aug_linmax' from 'audio_aug' (unknown location)",
|
|
12
|
+
"output_type": "error",
|
|
13
|
+
"traceback": [
|
|
14
|
+
"\u001b[31m---------------------------------------------------------------------------\u001b[39m",
|
|
15
|
+
"\u001b[31mImportError\u001b[39m Traceback (most recent call last)",
|
|
16
|
+
"\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[14]\u001b[39m\u001b[32m, line 5\u001b[39m\n\u001b[32m 2\u001b[39m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01msys\u001b[39;00m\n\u001b[32m 3\u001b[39m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mos\u001b[39;00m\n\u001b[32m----> \u001b[39m\u001b[32m5\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01maudio_aug\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m volume_aug_linmax, volume_aug_dbrms, volume_aug_lufs, measure_loudness\n\u001b[32m 6\u001b[39m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01msoundfile\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01msf\u001b[39;00m\n\u001b[32m 7\u001b[39m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mnumpy\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mnp\u001b[39;00m\n",
|
|
17
|
+
"\u001b[31mImportError\u001b[39m: cannot import name 'volume_aug_linmax' from 'audio_aug' (unknown location)"
|
|
18
|
+
]
|
|
19
|
+
},
|
|
20
|
+
{
|
|
21
|
+
"ename": "",
|
|
22
|
+
"evalue": "",
|
|
23
|
+
"output_type": "error",
|
|
24
|
+
"traceback": [
|
|
25
|
+
"\u001b[1;31mThe Kernel crashed while executing code in the current cell or a previous cell. \n",
|
|
26
|
+
"\u001b[1;31mPlease review the code in the cell(s) to identify a possible cause of the failure. \n",
|
|
27
|
+
"\u001b[1;31mClick <a href='https://aka.ms/vscodeJupyterKernelCrash'>here</a> for more info. \n",
|
|
28
|
+
"\u001b[1;31mView Jupyter <a href='command:jupyter.viewOutput'>log</a> for further details."
|
|
29
|
+
]
|
|
30
|
+
}
|
|
31
|
+
],
|
|
32
|
+
"source": [
|
|
33
|
+
"# 确保使用本地版本而非安装版本\n",
|
|
34
|
+
"import sys\n",
|
|
35
|
+
"import os\n",
|
|
36
|
+
"\n",
|
|
37
|
+
"from audio_aug import volume_aug_linmax, volume_aug_dbrms, volume_aug_lufs, measure_loudness\n",
|
|
38
|
+
"import soundfile as sf\n",
|
|
39
|
+
"import numpy as np\n",
|
|
40
|
+
"import matplotlib.pyplot as plt\n",
|
|
41
|
+
"\n",
|
|
42
|
+
"# 导入可视化工具\n",
|
|
43
|
+
"from neverlib.data_analyze.visualization import AudioVisualizer\n",
|
|
44
|
+
"\n",
|
|
45
|
+
"wav_path = \"/data03/never/Dataset/kws_data/Command_Word/group_a_class/zh/pos_example/上一首/037/soft_normal_male_25_151_vadstart5920_vadend33600_snr14.2.wav\"\n",
|
|
46
|
+
"wav, sr = sf.read(wav_path, always_2d=True, dtype=\"float32\")\n",
|
|
47
|
+
"wav = wav[:, 1]\n",
|
|
48
|
+
"\n",
|
|
49
|
+
"# 创建可视化工具\n",
|
|
50
|
+
"visualizer = AudioVisualizer(sr=sr)\n",
|
|
51
|
+
"\n",
|
|
52
|
+
"# 测量原始音频的响度信息\n",
|
|
53
|
+
"original_loudness = measure_loudness(wav, sr)\n",
|
|
54
|
+
"print(\"原始音频响度信息:\")\n",
|
|
55
|
+
"print(f\"峰值: {original_loudness['peak_dbfs']:.2f} dBFS\")\n",
|
|
56
|
+
"print(f\"RMS: {original_loudness['rms_dbfs']:.2f} dBFS\")\n",
|
|
57
|
+
"print(f\"LUFS: {original_loudness['lufs']:.2f} LUFS\")\n",
|
|
58
|
+
"print(f\"峰均比: {original_loudness['crest_factor_db']:.2f} dB\")"
|
|
59
|
+
]
|
|
60
|
+
},
|
|
61
|
+
{
|
|
62
|
+
"cell_type": "markdown",
|
|
63
|
+
"id": "f86b2ba1",
|
|
64
|
+
"metadata": {},
|
|
65
|
+
"source": [
|
|
66
|
+
"## linear 音量增强"
|
|
67
|
+
]
|
|
68
|
+
},
|
|
69
|
+
{
|
|
70
|
+
"cell_type": "code",
|
|
71
|
+
"execution_count": null,
|
|
72
|
+
"id": "4585297c",
|
|
73
|
+
"metadata": {},
|
|
74
|
+
"outputs": [],
|
|
75
|
+
"source": [
|
|
76
|
+
"wav_linear = volume_aug_linmax(wav, 0.5)\n",
|
|
77
|
+
"\n",
|
|
78
|
+
"# 测量处理后的响度\n",
|
|
79
|
+
"linear_loudness = measure_loudness(wav_linear, sr)\n",
|
|
80
|
+
"\n",
|
|
81
|
+
"# 绘制时域和频域图\n",
|
|
82
|
+
"plt.figure(figsize=(16, 8))\n",
|
|
83
|
+
"\n",
|
|
84
|
+
"# 波形图\n",
|
|
85
|
+
"plt.subplot(2, 1, 1)\n",
|
|
86
|
+
"visualizer.plot_waveform(wav, \"原始波形\", ax=plt.gca())\n",
|
|
87
|
+
"plt.subplot(2, 1, 2)\n",
|
|
88
|
+
"visualizer.plot_waveform(wav_linear, f\"Linear增强波形 (目标幅度: 0.5)\", ax=plt.gca())\n",
|
|
89
|
+
"plt.tight_layout()\n",
|
|
90
|
+
"plt.show()\n",
|
|
91
|
+
"\n",
|
|
92
|
+
"# 频谱图\n",
|
|
93
|
+
"plt.figure(figsize=(16, 8))\n",
|
|
94
|
+
"plt.subplot(2, 1, 1)\n",
|
|
95
|
+
"visualizer.plot_spectrogram(wav, \"原始频谱图\", ax=plt.gca())\n",
|
|
96
|
+
"plt.subplot(2, 1, 2)\n",
|
|
97
|
+
"visualizer.plot_spectrogram(wav_linear, f\"Linear增强频谱图 (目标幅度: 0.5)\", ax=plt.gca())\n",
|
|
98
|
+
"plt.tight_layout()\n",
|
|
99
|
+
"plt.show()\n",
|
|
100
|
+
"\n",
|
|
101
|
+
"# 响度对比\n",
|
|
102
|
+
"plt.figure(figsize=(10, 6))\n",
|
|
103
|
+
"labels = ['原始音频', 'Linear增强']\n",
|
|
104
|
+
"peak_values = [original_loudness['peak_dbfs'], linear_loudness['peak_dbfs']]\n",
|
|
105
|
+
"rms_values = [original_loudness['rms_dbfs'], linear_loudness['rms_dbfs']]\n",
|
|
106
|
+
"lufs_values = [original_loudness['lufs'], linear_loudness['lufs']]\n",
|
|
107
|
+
"\n",
|
|
108
|
+
"x = np.arange(len(labels))\n",
|
|
109
|
+
"width = 0.25\n",
|
|
110
|
+
"\n",
|
|
111
|
+
"plt.bar(x - width, peak_values, width, label='峰值 (dBFS)')\n",
|
|
112
|
+
"plt.bar(x, rms_values, width, label='RMS (dBFS)')\n",
|
|
113
|
+
"plt.bar(x + width, lufs_values, width, label='LUFS')\n",
|
|
114
|
+
"\n",
|
|
115
|
+
"plt.ylabel('分贝')\n",
|
|
116
|
+
"plt.title('Linear音量增强前后响度对比')\n",
|
|
117
|
+
"plt.xticks(x, labels)\n",
|
|
118
|
+
"plt.legend()\n",
|
|
119
|
+
"plt.grid(True, alpha=0.3)\n",
|
|
120
|
+
"plt.tight_layout()\n",
|
|
121
|
+
"plt.show()\n",
|
|
122
|
+
"\n",
|
|
123
|
+
"print(f\"Linear增强后响度信息:\")\n",
|
|
124
|
+
"print(f\"峰值: {linear_loudness['peak_dbfs']:.2f} dBFS (变化: {linear_loudness['peak_dbfs'] - original_loudness['peak_dbfs']:.2f} dB)\")\n",
|
|
125
|
+
"print(f\"RMS: {linear_loudness['rms_dbfs']:.2f} dBFS (变化: {linear_loudness['rms_dbfs'] - original_loudness['rms_dbfs']:.2f} dB)\")\n",
|
|
126
|
+
"print(f\"LUFS: {linear_loudness['lufs']:.2f} LUFS (变化: {linear_loudness['lufs'] - original_loudness['lufs']:.2f} dB)\")\n",
|
|
127
|
+
"print(f\"峰均比: {linear_loudness['crest_factor_db']:.2f} dB\")"
|
|
128
|
+
]
|
|
129
|
+
},
|
|
130
|
+
{
|
|
131
|
+
"cell_type": "markdown",
|
|
132
|
+
"id": "56fb9004",
|
|
133
|
+
"metadata": {},
|
|
134
|
+
"source": [
|
|
135
|
+
"## dBrms 音量增强"
|
|
136
|
+
]
|
|
137
|
+
},
|
|
138
|
+
{
|
|
139
|
+
"cell_type": "code",
|
|
140
|
+
"execution_count": null,
|
|
141
|
+
"id": "7e88967d",
|
|
142
|
+
"metadata": {},
|
|
143
|
+
"outputs": [],
|
|
144
|
+
"source": [
|
|
145
|
+
"wav_dbrms = volume_aug_dbrms(wav, -6)\n",
|
|
146
|
+
"\n",
|
|
147
|
+
"# 测量处理后的响度\n",
|
|
148
|
+
"dbrms_loudness = measure_loudness(wav_dbrms, sr)\n",
|
|
149
|
+
"\n",
|
|
150
|
+
"# 绘制时域和频域图\n",
|
|
151
|
+
"plt.figure(figsize=(16, 8))\n",
|
|
152
|
+
"\n",
|
|
153
|
+
"# 波形图\n",
|
|
154
|
+
"plt.subplot(2, 1, 1)\n",
|
|
155
|
+
"visualizer.plot_waveform(wav, \"原始波形\", ax=plt.gca())\n",
|
|
156
|
+
"plt.subplot(2, 1, 2)\n",
|
|
157
|
+
"visualizer.plot_waveform(wav_dbrms, f\"dBrms增强波形 (目标电平: -6 dB)\", ax=plt.gca())\n",
|
|
158
|
+
"plt.tight_layout()\n",
|
|
159
|
+
"plt.show()\n",
|
|
160
|
+
"\n",
|
|
161
|
+
"# 频谱图\n",
|
|
162
|
+
"plt.figure(figsize=(16, 8))\n",
|
|
163
|
+
"plt.subplot(2, 1, 1)\n",
|
|
164
|
+
"visualizer.plot_spectrogram(wav, \"原始频谱图\", ax=plt.gca())\n",
|
|
165
|
+
"plt.subplot(2, 1, 2)\n",
|
|
166
|
+
"visualizer.plot_spectrogram(wav_dbrms, f\"dBrms增强频谱图 (目标电平: -6 dB)\", ax=plt.gca())\n",
|
|
167
|
+
"plt.tight_layout()\n",
|
|
168
|
+
"plt.show()\n",
|
|
169
|
+
"\n",
|
|
170
|
+
"# 响度对比\n",
|
|
171
|
+
"plt.figure(figsize=(10, 6))\n",
|
|
172
|
+
"labels = ['原始音频', 'dBrms增强']\n",
|
|
173
|
+
"peak_values = [original_loudness['peak_dbfs'], dbrms_loudness['peak_dbfs']]\n",
|
|
174
|
+
"rms_values = [original_loudness['rms_dbfs'], dbrms_loudness['rms_dbfs']]\n",
|
|
175
|
+
"lufs_values = [original_loudness['lufs'], dbrms_loudness['lufs']]\n",
|
|
176
|
+
"\n",
|
|
177
|
+
"x = np.arange(len(labels))\n",
|
|
178
|
+
"width = 0.25\n",
|
|
179
|
+
"\n",
|
|
180
|
+
"plt.bar(x - width, peak_values, width, label='峰值 (dBFS)')\n",
|
|
181
|
+
"plt.bar(x, rms_values, width, label='RMS (dBFS)')\n",
|
|
182
|
+
"plt.bar(x + width, lufs_values, width, label='LUFS')\n",
|
|
183
|
+
"\n",
|
|
184
|
+
"plt.ylabel('分贝')\n",
|
|
185
|
+
"plt.title('dBrms音量增强前后响度对比')\n",
|
|
186
|
+
"plt.xticks(x, labels)\n",
|
|
187
|
+
"plt.legend()\n",
|
|
188
|
+
"plt.grid(True, alpha=0.3)\n",
|
|
189
|
+
"plt.tight_layout()\n",
|
|
190
|
+
"plt.show()\n",
|
|
191
|
+
"\n",
|
|
192
|
+
"print(f\"dBrms增强后响度信息:\")\n",
|
|
193
|
+
"print(f\"峰值: {dbrms_loudness['peak_dbfs']:.2f} dBFS (变化: {dbrms_loudness['peak_dbfs'] - original_loudness['peak_dbfs']:.2f} dB)\")\n",
|
|
194
|
+
"print(f\"RMS: {dbrms_loudness['rms_dbfs']:.2f} dBFS (变化: {dbrms_loudness['rms_dbfs'] - original_loudness['rms_dbfs']:.2f} dB)\")\n",
|
|
195
|
+
"print(f\"LUFS: {dbrms_loudness['lufs']:.2f} LUFS (变化: {dbrms_loudness['lufs'] - original_loudness['lufs']:.2f} dB)\")\n",
|
|
196
|
+
"print(f\"峰均比: {dbrms_loudness['crest_factor_db']:.2f} dB\")"
|
|
197
|
+
]
|
|
198
|
+
},
|
|
199
|
+
{
|
|
200
|
+
"cell_type": "markdown",
|
|
201
|
+
"id": "e6a9695f",
|
|
202
|
+
"metadata": {},
|
|
203
|
+
"source": [
|
|
204
|
+
"# lufs音量增强"
|
|
205
|
+
]
|
|
206
|
+
},
|
|
207
|
+
{
|
|
208
|
+
"cell_type": "code",
|
|
209
|
+
"execution_count": null,
|
|
210
|
+
"id": "30b21884",
|
|
211
|
+
"metadata": {},
|
|
212
|
+
"outputs": [],
|
|
213
|
+
"source": [
|
|
214
|
+
"wav_lufs = volume_aug_lufs(wav, -16)\n",
|
|
215
|
+
"\n",
|
|
216
|
+
"# 测量处理后的响度\n",
|
|
217
|
+
"lufs_loudness = measure_loudness(wav_lufs, sr)\n",
|
|
218
|
+
"\n",
|
|
219
|
+
"# 绘制时域和频域图\n",
|
|
220
|
+
"plt.figure(figsize=(16, 8))\n",
|
|
221
|
+
"\n",
|
|
222
|
+
"# 波形图\n",
|
|
223
|
+
"plt.subplot(2, 1, 1)\n",
|
|
224
|
+
"visualizer.plot_waveform(wav, \"原始波形\", ax=plt.gca())\n",
|
|
225
|
+
"plt.subplot(2, 1, 2)\n",
|
|
226
|
+
"visualizer.plot_waveform(wav_lufs, f\"LUFS增强波形 (目标响度: -16 LUFS)\", ax=plt.gca())\n",
|
|
227
|
+
"plt.tight_layout()\n",
|
|
228
|
+
"plt.show()\n",
|
|
229
|
+
"\n",
|
|
230
|
+
"# 频谱图\n",
|
|
231
|
+
"plt.figure(figsize=(16, 8))\n",
|
|
232
|
+
"plt.subplot(2, 1, 1)\n",
|
|
233
|
+
"visualizer.plot_spectrogram(wav, \"原始频谱图\", ax=plt.gca())\n",
|
|
234
|
+
"plt.subplot(2, 1, 2)\n",
|
|
235
|
+
"visualizer.plot_spectrogram(wav_lufs, f\"LUFS增强频谱图 (目标响度: -16 LUFS)\", ax=plt.gca())\n",
|
|
236
|
+
"plt.tight_layout()\n",
|
|
237
|
+
"plt.show()\n",
|
|
238
|
+
"\n",
|
|
239
|
+
"# 响度对比\n",
|
|
240
|
+
"plt.figure(figsize=(10, 6))\n",
|
|
241
|
+
"labels = ['原始音频', 'LUFS增强']\n",
|
|
242
|
+
"peak_values = [original_loudness['peak_dbfs'], lufs_loudness['peak_dbfs']]\n",
|
|
243
|
+
"rms_values = [original_loudness['rms_dbfs'], lufs_loudness['rms_dbfs']]\n",
|
|
244
|
+
"lufs_values = [original_loudness['lufs'], lufs_loudness['lufs']]\n",
|
|
245
|
+
"\n",
|
|
246
|
+
"x = np.arange(len(labels))\n",
|
|
247
|
+
"width = 0.25\n",
|
|
248
|
+
"\n",
|
|
249
|
+
"plt.bar(x - width, peak_values, width, label='峰值 (dBFS)')\n",
|
|
250
|
+
"plt.bar(x, rms_values, width, label='RMS (dBFS)')\n",
|
|
251
|
+
"plt.bar(x + width, lufs_values, width, label='LUFS')\n",
|
|
252
|
+
"\n",
|
|
253
|
+
"plt.ylabel('分贝')\n",
|
|
254
|
+
"plt.title('LUFS音量增强前后响度对比')\n",
|
|
255
|
+
"plt.xticks(x, labels)\n",
|
|
256
|
+
"plt.legend()\n",
|
|
257
|
+
"plt.grid(True, alpha=0.3)\n",
|
|
258
|
+
"plt.tight_layout()\n",
|
|
259
|
+
"plt.show()\n",
|
|
260
|
+
"\n",
|
|
261
|
+
"print(f\"LUFS增强后响度信息:\")\n",
|
|
262
|
+
"print(f\"峰值: {lufs_loudness['peak_dbfs']:.2f} dBFS (变化: {lufs_loudness['peak_dbfs'] - original_loudness['peak_dbfs']:.2f} dB)\")\n",
|
|
263
|
+
"print(f\"RMS: {lufs_loudness['rms_dbfs']:.2f} dBFS (变化: {lufs_loudness['rms_dbfs'] - original_loudness['rms_dbfs']:.2f} dB)\")\n",
|
|
264
|
+
"print(f\"LUFS: {lufs_loudness['lufs']:.2f} LUFS (变化: {lufs_loudness['lufs'] - original_loudness['lufs']:.2f} dB)\")\n",
|
|
265
|
+
"print(f\"峰均比: {lufs_loudness['crest_factor_db']:.2f} dB\")"
|
|
266
|
+
]
|
|
267
|
+
},
|
|
268
|
+
{
|
|
269
|
+
"cell_type": "code",
|
|
270
|
+
"execution_count": null,
|
|
271
|
+
"id": "k6vath03xyg",
|
|
272
|
+
"metadata": {},
|
|
273
|
+
"outputs": [],
|
|
274
|
+
"source": [
|
|
275
|
+
"# 创建三种增强方法的综合对比图\n",
|
|
276
|
+
"plt.figure(figsize=(16, 12))\n",
|
|
277
|
+
"\n",
|
|
278
|
+
"# 波形对比\n",
|
|
279
|
+
"plt.subplot(3, 1, 1)\n",
|
|
280
|
+
"time = np.linspace(0, len(wav) / sr, len(wav))\n",
|
|
281
|
+
"plt.plot(time, wav, label='原始波形', alpha=0.7)\n",
|
|
282
|
+
"plt.plot(time, wav_linear, label='Linear增强', alpha=0.7)\n",
|
|
283
|
+
"plt.plot(time, wav_dbrms, label='dBrms增强', alpha=0.7)\n",
|
|
284
|
+
"plt.plot(time, wav_lufs, label='LUFS增强', alpha=0.7)\n",
|
|
285
|
+
"plt.title('三种音量增强方法波形对比')\n",
|
|
286
|
+
"plt.xlabel('时间 (s)')\n",
|
|
287
|
+
"plt.ylabel('幅度')\n",
|
|
288
|
+
"plt.grid(True, alpha=0.3)\n",
|
|
289
|
+
"plt.legend()\n",
|
|
290
|
+
"\n",
|
|
291
|
+
"# 响度对比 - 条形图\n",
|
|
292
|
+
"plt.subplot(3, 1, 2)\n",
|
|
293
|
+
"labels = ['原始音频', 'Linear增强', 'dBrms增强', 'LUFS增强']\n",
|
|
294
|
+
"peak_values = [original_loudness['peak_dbfs'], linear_loudness['peak_dbfs'], \n",
|
|
295
|
+
" dbrms_loudness['peak_dbfs'], lufs_loudness['peak_dbfs']]\n",
|
|
296
|
+
"rms_values = [original_loudness['rms_dbfs'], linear_loudness['rms_dbfs'], \n",
|
|
297
|
+
" dbrms_loudness['rms_dbfs'], lufs_loudness['rms_dbfs']]\n",
|
|
298
|
+
"lufs_values = [original_loudness['lufs'], linear_loudness['lufs'], \n",
|
|
299
|
+
" dbrms_loudness['lufs'], lufs_loudness['lufs']]\n",
|
|
300
|
+
"\n",
|
|
301
|
+
"x = np.arange(len(labels))\n",
|
|
302
|
+
"width = 0.25\n",
|
|
303
|
+
"\n",
|
|
304
|
+
"plt.bar(x - width, peak_values, width, label='峰值 (dBFS)')\n",
|
|
305
|
+
"plt.bar(x, rms_values, width, label='RMS (dBFS)')\n",
|
|
306
|
+
"plt.bar(x + width, lufs_values, width, label='LUFS')\n",
|
|
307
|
+
"\n",
|
|
308
|
+
"plt.ylabel('分贝')\n",
|
|
309
|
+
"plt.title('三种音量增强方法响度对比')\n",
|
|
310
|
+
"plt.xticks(x, labels)\n",
|
|
311
|
+
"plt.legend()\n",
|
|
312
|
+
"plt.grid(True, alpha=0.3)\n",
|
|
313
|
+
"\n",
|
|
314
|
+
"# 峰均比对比\n",
|
|
315
|
+
"plt.subplot(3, 1, 3)\n",
|
|
316
|
+
"crest_values = [original_loudness['crest_factor_db'], linear_loudness['crest_factor_db'],\n",
|
|
317
|
+
" dbrms_loudness['crest_factor_db'], lufs_loudness['crest_factor_db']]\n",
|
|
318
|
+
"\n",
|
|
319
|
+
"plt.bar(x, crest_values, width=0.5, color='purple', alpha=0.7)\n",
|
|
320
|
+
"plt.ylabel('分贝')\n",
|
|
321
|
+
"plt.title('三种音量增强方法峰均比对比')\n",
|
|
322
|
+
"plt.xticks(x, labels)\n",
|
|
323
|
+
"plt.grid(True, alpha=0.3)\n",
|
|
324
|
+
"\n",
|
|
325
|
+
"plt.tight_layout()\n",
|
|
326
|
+
"plt.show()\n",
|
|
327
|
+
"\n",
|
|
328
|
+
"# 创建综合对比表格\n",
|
|
329
|
+
"print(\"\\n三种音量增强方法综合对比:\")\n",
|
|
330
|
+
"print(\"=\" * 80)\n",
|
|
331
|
+
"print(f\"{'方法':<12}{'峰值 (dBFS)':<20}{'RMS (dBFS)':<20}{'LUFS':<20}{'峰均比 (dB)':<15}\")\n",
|
|
332
|
+
"print(\"-\" * 80)\n",
|
|
333
|
+
"print(f\"{'原始音频':<12}{original_loudness['peak_dbfs']:<20.2f}{original_loudness['rms_dbfs']:<20.2f}{original_loudness['lufs']:<20.2f}{original_loudness['crest_factor_db']:<15.2f}\")\n",
|
|
334
|
+
"print(f\"{'Linear增强':<12}{linear_loudness['peak_dbfs']:<20.2f}{linear_loudness['rms_dbfs']:<20.2f}{linear_loudness['lufs']:<20.2f}{linear_loudness['crest_factor_db']:<15.2f}\")\n",
|
|
335
|
+
"print(f\"{'dBrms增强':<12}{dbrms_loudness['peak_dbfs']:<20.2f}{dbrms_loudness['rms_dbfs']:<20.2f}{dbrms_loudness['lufs']:<20.2f}{dbrms_loudness['crest_factor_db']:<15.2f}\")\n",
|
|
336
|
+
"print(f\"{'LUFS增强':<12}{lufs_loudness['peak_dbfs']:<20.2f}{lufs_loudness['rms_dbfs']:<20.2f}{lufs_loudness['lufs']:<20.2f}{lufs_loudness['crest_factor_db']:<15.2f}\")\n",
|
|
337
|
+
"print(\"=\" * 80)\n",
|
|
338
|
+
"\n",
|
|
339
|
+
"# 各方法响度变化量\n",
|
|
340
|
+
"print(\"\\n响度变化量 (相对于原始音频):\")\n",
|
|
341
|
+
"print(\"=\" * 80)\n",
|
|
342
|
+
"print(f\"{'方法':<12}{'峰值变化 (dB)':<20}{'RMS变化 (dB)':<20}{'LUFS变化 (dB)':<20}\")\n",
|
|
343
|
+
"print(\"-\" * 80)\n",
|
|
344
|
+
"print(f\"{'Linear增强':<12}{linear_loudness['peak_dbfs'] - original_loudness['peak_dbfs']:<20.2f}{linear_loudness['rms_dbfs'] - original_loudness['rms_dbfs']:<20.2f}{linear_loudness['lufs'] - original_loudness['lufs']:<20.2f}\")\n",
|
|
345
|
+
"print(f\"{'dBrms增强':<12}{dbrms_loudness['peak_dbfs'] - original_loudness['peak_dbfs']:<20.2f}{dbrms_loudness['rms_dbfs'] - original_loudness['rms_dbfs']:<20.2f}{dbrms_loudness['lufs'] - original_loudness['lufs']:<20.2f}\")\n",
|
|
346
|
+
"print(f\"{'LUFS增强':<12}{lufs_loudness['peak_dbfs'] - original_loudness['peak_dbfs']:<20.2f}{lufs_loudness['rms_dbfs'] - original_loudness['rms_dbfs']:<20.2f}{lufs_loudness['lufs'] - original_loudness['lufs']:<20.2f}\")\n",
|
|
347
|
+
"print(\"=\" * 80)\n",
|
|
348
|
+
"\n",
|
|
349
|
+
"# 总结分析\n",
|
|
350
|
+
"print(\"\\n音量增强方法分析总结:\")\n",
|
|
351
|
+
"print(\"=\" * 80)\n",
|
|
352
|
+
"print(\"1. Linear增强 (volume_aug_linmax):\")\n",
|
|
353
|
+
"print(\" - 基于峰值的线性增益,目标为最大峰值为指定的线性值\")\n",
|
|
354
|
+
"print(\" - 特点:保持动态范围不变,整体增强或减弱\")\n",
|
|
355
|
+
"print(\" - 适用场景:需要精确控制峰值而不改变音频动态特性的场合\")\n",
|
|
356
|
+
"print(\"\\n2. dBrms增强 (volume_aug_dbrms):\")\n",
|
|
357
|
+
"print(\" - 基于RMS电平的增益,目标为指定的dB电平\")\n",
|
|
358
|
+
"print(\" - 特点:以能量均值为基准,更接近人耳感知\")\n",
|
|
359
|
+
"print(\" - 适用场景:需要统一音频能量电平的场合\")\n",
|
|
360
|
+
"print(\"\\n3. LUFS增强 (volume_aug_lufs):\")\n",
|
|
361
|
+
"print(\" - 基于国际响度标准的增益,符合广播标准\")\n",
|
|
362
|
+
"print(\" - 特点:考虑人耳频率加权,最接近人耳响度感知\")\n",
|
|
363
|
+
"print(\" - 适用场景:广播、流媒体、专业音频处理等需要符合响度标准的场合\")\n",
|
|
364
|
+
"print(\"=\" * 80)"
|
|
365
|
+
]
|
|
366
|
+
},
|
|
367
|
+
{
|
|
368
|
+
"cell_type": "markdown",
|
|
369
|
+
"id": "2fb931jxgtr",
|
|
370
|
+
"metadata": {},
|
|
371
|
+
"source": [
|
|
372
|
+
"# 音量增强方法综合分析\n",
|
|
373
|
+
"\n",
|
|
374
|
+
"本笔记本演示并分析了neverlib库中三种音量增强方法的特性和效果:\n",
|
|
375
|
+
"\n",
|
|
376
|
+
"1. **Linear增强 (volume_aug_linmax)**\n",
|
|
377
|
+
" - 基于峰值的线性增益\n",
|
|
378
|
+
" - 将音频最大峰值调整为目标线性值(0-1范围)\n",
|
|
379
|
+
" - 保持动态范围不变\n",
|
|
380
|
+
"\n",
|
|
381
|
+
"2. **dBrms增强 (volume_aug_dbrms)**\n",
|
|
382
|
+
" - 基于均方根(RMS)功率的增益\n",
|
|
383
|
+
" - 将音频RMS电平调整为目标分贝值\n",
|
|
384
|
+
" - 更接近能量感知\n",
|
|
385
|
+
"\n",
|
|
386
|
+
"3. **LUFS增强 (volume_aug_lufs)**\n",
|
|
387
|
+
" - 基于感知响度单位(LUFS)的增益\n",
|
|
388
|
+
" - 符合广播标准的响度归一化\n",
|
|
389
|
+
" - 考虑人耳感知加权,最接近人耳响度感知\n",
|
|
390
|
+
"\n",
|
|
391
|
+
"各方法适用于不同场景,可根据需求选择合适的音量增强方式。"
|
|
392
|
+
]
|
|
393
|
+
}
|
|
394
|
+
],
|
|
395
|
+
"metadata": {
|
|
396
|
+
"kernelspec": {
|
|
397
|
+
"display_name": "py311torch211",
|
|
398
|
+
"language": "python",
|
|
399
|
+
"name": "python3"
|
|
400
|
+
},
|
|
401
|
+
"language_info": {
|
|
402
|
+
"codemirror_mode": {
|
|
403
|
+
"name": "ipython",
|
|
404
|
+
"version": 3
|
|
405
|
+
},
|
|
406
|
+
"file_extension": ".py",
|
|
407
|
+
"mimetype": "text/x-python",
|
|
408
|
+
"name": "python",
|
|
409
|
+
"nbconvert_exporter": "python",
|
|
410
|
+
"pygments_lexer": "ipython3",
|
|
411
|
+
"version": "3.11.9"
|
|
412
|
+
}
|
|
413
|
+
},
|
|
414
|
+
"nbformat": 4,
|
|
415
|
+
"nbformat_minor": 5
|
|
416
|
+
}
|