neverlib 0.2.6__py3-none-any.whl → 0.2.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. neverlib/.claude/settings.local.json +9 -0
  2. neverlib/Docs/audio_aug/test_volume.ipynb +416 -0
  3. neverlib/Docs/audio_aug_test/test_volume.ipynb +289 -0
  4. neverlib/Docs/filter/biquad.ipynb +129 -0
  5. neverlib/Docs/filter/filter_family.ipynb +450 -0
  6. neverlib/Docs/filter/highpass.ipynb +139 -0
  7. neverlib/Docs/filter/scipy_filter_family.ipynb +110 -0
  8. neverlib/Docs/vad/VAD_Energy.ipynb +167 -0
  9. neverlib/Docs/vad/VAD_Silero.ipynb +325 -0
  10. neverlib/Docs/vad/VAD_WebRTC.ipynb +189 -0
  11. neverlib/Docs/vad/VAD_funasr.ipynb +192 -0
  12. neverlib/Docs/vad/VAD_rvADfast.ipynb +162 -0
  13. neverlib/Docs/vad/VAD_statistics.ipynb +532 -0
  14. neverlib/Docs/vad/VAD_tenVAD.ipynb +292 -0
  15. neverlib/Docs/vad/VAD_vadlib.ipynb +168 -0
  16. neverlib/Docs/vad/VAD_whisper.ipynb +404 -0
  17. neverlib/QA/gen_init.py +117 -0
  18. neverlib/QA/get_fun.py +19 -0
  19. neverlib/__init__.py +21 -4
  20. neverlib/audio_aug/HarmonicDistortion.py +19 -13
  21. neverlib/audio_aug/__init__.py +30 -12
  22. neverlib/audio_aug/audio_aug.py +19 -14
  23. neverlib/audio_aug/clip_aug.py +15 -18
  24. neverlib/audio_aug/coder_aug.py +44 -24
  25. neverlib/audio_aug/coder_aug2.py +54 -37
  26. neverlib/audio_aug/loss_packet_aug.py +7 -7
  27. neverlib/audio_aug/quant_aug.py +19 -17
  28. neverlib/data/000_short_enhance.wav +0 -0
  29. neverlib/data/3956_speech.wav +0 -0
  30. neverlib/data/3956_sweep.wav +0 -0
  31. neverlib/data/vad_example.wav +0 -0
  32. neverlib/data/white.wav +0 -0
  33. neverlib/data/white_EQ.wav +0 -0
  34. neverlib/data/white_matched.wav +0 -0
  35. neverlib/data_analyze/__init__.py +25 -20
  36. neverlib/data_analyze/dataset_analyzer.py +109 -114
  37. neverlib/data_analyze/quality_metrics.py +87 -89
  38. neverlib/data_analyze/rms_distrubution.py +23 -42
  39. neverlib/data_analyze/spectral_analysis.py +43 -46
  40. neverlib/data_analyze/statistics.py +76 -76
  41. neverlib/data_analyze/temporal_features.py +15 -6
  42. neverlib/data_analyze/visualization.py +208 -144
  43. neverlib/filter/__init__.py +17 -20
  44. neverlib/filter/auto_eq/__init__.py +18 -35
  45. neverlib/filter/auto_eq/de_eq.py +0 -2
  46. neverlib/filter/common.py +24 -5
  47. neverlib/metrics/DNSMOS/bak_ovr.onnx +0 -0
  48. neverlib/metrics/DNSMOS/model_v8.onnx +0 -0
  49. neverlib/metrics/DNSMOS/sig.onnx +0 -0
  50. neverlib/metrics/DNSMOS/sig_bak_ovr.onnx +0 -0
  51. neverlib/metrics/__init__.py +23 -0
  52. neverlib/metrics/dnsmos.py +4 -15
  53. neverlib/metrics/pDNSMOS/sig_bak_ovr.onnx +0 -0
  54. neverlib/metrics/pesq_c/PESQ +0 -0
  55. neverlib/metrics/pesq_c/dsp.c +553 -0
  56. neverlib/metrics/pesq_c/dsp.h +138 -0
  57. neverlib/metrics/pesq_c/pesq.h +294 -0
  58. neverlib/metrics/pesq_c/pesqdsp.c +1047 -0
  59. neverlib/metrics/pesq_c/pesqio.c +392 -0
  60. neverlib/metrics/pesq_c/pesqmain.c +610 -0
  61. neverlib/metrics/pesq_c/pesqmod.c +1417 -0
  62. neverlib/metrics/pesq_c/pesqpar.h +297 -0
  63. neverlib/metrics/snr.py +5 -1
  64. neverlib/metrics/spec.py +31 -21
  65. neverlib/metrics/test_pesq.py +0 -4
  66. neverlib/tests/test_imports.py +17 -0
  67. neverlib/utils/__init__.py +26 -15
  68. neverlib/utils/audio_split.py +5 -1
  69. neverlib/utils/checkGPU.py +17 -9
  70. neverlib/utils/lazy_expose.py +29 -0
  71. neverlib/utils/utils.py +40 -12
  72. neverlib/vad/__init__.py +33 -25
  73. neverlib/vad/class_get_speech.py +1 -1
  74. neverlib/vad/class_vad.py +3 -3
  75. neverlib/vad/img.png +0 -0
  76. {neverlib-0.2.6.dist-info → neverlib-0.2.7.dist-info}/METADATA +1 -1
  77. {neverlib-0.2.6.dist-info → neverlib-0.2.7.dist-info}/RECORD +80 -37
  78. {neverlib-0.2.6.dist-info → neverlib-0.2.7.dist-info}/WHEEL +0 -0
  79. {neverlib-0.2.6.dist-info → neverlib-0.2.7.dist-info}/licenses/LICENSE +0 -0
  80. {neverlib-0.2.6.dist-info → neverlib-0.2.7.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,9 @@
1
+ {
2
+ "permissions": {
3
+ "allow": [
4
+ "Read(/data03/never/Desktop/neverlib/**)"
5
+ ],
6
+ "deny": [],
7
+ "ask": []
8
+ }
9
+ }
@@ -0,0 +1,416 @@
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": null,
6
+ "id": "f27585eb",
7
+ "metadata": {},
8
+ "outputs": [
9
+ {
10
+ "ename": "ImportError",
11
+ "evalue": "cannot import name 'volume_aug_linmax' from 'audio_aug' (unknown location)",
12
+ "output_type": "error",
13
+ "traceback": [
14
+ "\u001b[31m---------------------------------------------------------------------------\u001b[39m",
15
+ "\u001b[31mImportError\u001b[39m Traceback (most recent call last)",
16
+ "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[14]\u001b[39m\u001b[32m, line 5\u001b[39m\n\u001b[32m 2\u001b[39m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01msys\u001b[39;00m\n\u001b[32m 3\u001b[39m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mos\u001b[39;00m\n\u001b[32m----> \u001b[39m\u001b[32m5\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01maudio_aug\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m volume_aug_linmax, volume_aug_dbrms, volume_aug_lufs, measure_loudness\n\u001b[32m 6\u001b[39m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01msoundfile\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01msf\u001b[39;00m\n\u001b[32m 7\u001b[39m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mnumpy\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mnp\u001b[39;00m\n",
17
+ "\u001b[31mImportError\u001b[39m: cannot import name 'volume_aug_linmax' from 'audio_aug' (unknown location)"
18
+ ]
19
+ },
20
+ {
21
+ "ename": "",
22
+ "evalue": "",
23
+ "output_type": "error",
24
+ "traceback": [
25
+ "\u001b[1;31mThe Kernel crashed while executing code in the current cell or a previous cell. \n",
26
+ "\u001b[1;31mPlease review the code in the cell(s) to identify a possible cause of the failure. \n",
27
+ "\u001b[1;31mClick <a href='https://aka.ms/vscodeJupyterKernelCrash'>here</a> for more info. \n",
28
+ "\u001b[1;31mView Jupyter <a href='command:jupyter.viewOutput'>log</a> for further details."
29
+ ]
30
+ }
31
+ ],
32
+ "source": [
33
+ "# 确保使用本地版本而非安装版本\n",
34
+ "import sys\n",
35
+ "import os\n",
36
+ "\n",
37
+ "from audio_aug import volume_aug_linmax, volume_aug_dbrms, volume_aug_lufs, measure_loudness\n",
38
+ "import soundfile as sf\n",
39
+ "import numpy as np\n",
40
+ "import matplotlib.pyplot as plt\n",
41
+ "\n",
42
+ "# 导入可视化工具\n",
43
+ "from neverlib.data_analyze.visualization import AudioVisualizer\n",
44
+ "\n",
45
+ "wav_path = \"/data03/never/Dataset/kws_data/Command_Word/group_a_class/zh/pos_example/上一首/037/soft_normal_male_25_151_vadstart5920_vadend33600_snr14.2.wav\"\n",
46
+ "wav, sr = sf.read(wav_path, always_2d=True, dtype=\"float32\")\n",
47
+ "wav = wav[:, 1]\n",
48
+ "\n",
49
+ "# 创建可视化工具\n",
50
+ "visualizer = AudioVisualizer(sr=sr)\n",
51
+ "\n",
52
+ "# 测量原始音频的响度信息\n",
53
+ "original_loudness = measure_loudness(wav, sr)\n",
54
+ "print(\"原始音频响度信息:\")\n",
55
+ "print(f\"峰值: {original_loudness['peak_dbfs']:.2f} dBFS\")\n",
56
+ "print(f\"RMS: {original_loudness['rms_dbfs']:.2f} dBFS\")\n",
57
+ "print(f\"LUFS: {original_loudness['lufs']:.2f} LUFS\")\n",
58
+ "print(f\"峰均比: {original_loudness['crest_factor_db']:.2f} dB\")"
59
+ ]
60
+ },
61
+ {
62
+ "cell_type": "markdown",
63
+ "id": "f86b2ba1",
64
+ "metadata": {},
65
+ "source": [
66
+ "## linear 音量增强"
67
+ ]
68
+ },
69
+ {
70
+ "cell_type": "code",
71
+ "execution_count": null,
72
+ "id": "4585297c",
73
+ "metadata": {},
74
+ "outputs": [],
75
+ "source": [
76
+ "wav_linear = volume_aug_linmax(wav, 0.5)\n",
77
+ "\n",
78
+ "# 测量处理后的响度\n",
79
+ "linear_loudness = measure_loudness(wav_linear, sr)\n",
80
+ "\n",
81
+ "# 绘制时域和频域图\n",
82
+ "plt.figure(figsize=(16, 8))\n",
83
+ "\n",
84
+ "# 波形图\n",
85
+ "plt.subplot(2, 1, 1)\n",
86
+ "visualizer.plot_waveform(wav, \"原始波形\", ax=plt.gca())\n",
87
+ "plt.subplot(2, 1, 2)\n",
88
+ "visualizer.plot_waveform(wav_linear, f\"Linear增强波形 (目标幅度: 0.5)\", ax=plt.gca())\n",
89
+ "plt.tight_layout()\n",
90
+ "plt.show()\n",
91
+ "\n",
92
+ "# 频谱图\n",
93
+ "plt.figure(figsize=(16, 8))\n",
94
+ "plt.subplot(2, 1, 1)\n",
95
+ "visualizer.plot_spectrogram(wav, \"原始频谱图\", ax=plt.gca())\n",
96
+ "plt.subplot(2, 1, 2)\n",
97
+ "visualizer.plot_spectrogram(wav_linear, f\"Linear增强频谱图 (目标幅度: 0.5)\", ax=plt.gca())\n",
98
+ "plt.tight_layout()\n",
99
+ "plt.show()\n",
100
+ "\n",
101
+ "# 响度对比\n",
102
+ "plt.figure(figsize=(10, 6))\n",
103
+ "labels = ['原始音频', 'Linear增强']\n",
104
+ "peak_values = [original_loudness['peak_dbfs'], linear_loudness['peak_dbfs']]\n",
105
+ "rms_values = [original_loudness['rms_dbfs'], linear_loudness['rms_dbfs']]\n",
106
+ "lufs_values = [original_loudness['lufs'], linear_loudness['lufs']]\n",
107
+ "\n",
108
+ "x = np.arange(len(labels))\n",
109
+ "width = 0.25\n",
110
+ "\n",
111
+ "plt.bar(x - width, peak_values, width, label='峰值 (dBFS)')\n",
112
+ "plt.bar(x, rms_values, width, label='RMS (dBFS)')\n",
113
+ "plt.bar(x + width, lufs_values, width, label='LUFS')\n",
114
+ "\n",
115
+ "plt.ylabel('分贝')\n",
116
+ "plt.title('Linear音量增强前后响度对比')\n",
117
+ "plt.xticks(x, labels)\n",
118
+ "plt.legend()\n",
119
+ "plt.grid(True, alpha=0.3)\n",
120
+ "plt.tight_layout()\n",
121
+ "plt.show()\n",
122
+ "\n",
123
+ "print(f\"Linear增强后响度信息:\")\n",
124
+ "print(f\"峰值: {linear_loudness['peak_dbfs']:.2f} dBFS (变化: {linear_loudness['peak_dbfs'] - original_loudness['peak_dbfs']:.2f} dB)\")\n",
125
+ "print(f\"RMS: {linear_loudness['rms_dbfs']:.2f} dBFS (变化: {linear_loudness['rms_dbfs'] - original_loudness['rms_dbfs']:.2f} dB)\")\n",
126
+ "print(f\"LUFS: {linear_loudness['lufs']:.2f} LUFS (变化: {linear_loudness['lufs'] - original_loudness['lufs']:.2f} dB)\")\n",
127
+ "print(f\"峰均比: {linear_loudness['crest_factor_db']:.2f} dB\")"
128
+ ]
129
+ },
130
+ {
131
+ "cell_type": "markdown",
132
+ "id": "56fb9004",
133
+ "metadata": {},
134
+ "source": [
135
+ "## dBrms 音量增强"
136
+ ]
137
+ },
138
+ {
139
+ "cell_type": "code",
140
+ "execution_count": null,
141
+ "id": "7e88967d",
142
+ "metadata": {},
143
+ "outputs": [],
144
+ "source": [
145
+ "wav_dbrms = volume_aug_dbrms(wav, -6)\n",
146
+ "\n",
147
+ "# 测量处理后的响度\n",
148
+ "dbrms_loudness = measure_loudness(wav_dbrms, sr)\n",
149
+ "\n",
150
+ "# 绘制时域和频域图\n",
151
+ "plt.figure(figsize=(16, 8))\n",
152
+ "\n",
153
+ "# 波形图\n",
154
+ "plt.subplot(2, 1, 1)\n",
155
+ "visualizer.plot_waveform(wav, \"原始波形\", ax=plt.gca())\n",
156
+ "plt.subplot(2, 1, 2)\n",
157
+ "visualizer.plot_waveform(wav_dbrms, f\"dBrms增强波形 (目标电平: -6 dB)\", ax=plt.gca())\n",
158
+ "plt.tight_layout()\n",
159
+ "plt.show()\n",
160
+ "\n",
161
+ "# 频谱图\n",
162
+ "plt.figure(figsize=(16, 8))\n",
163
+ "plt.subplot(2, 1, 1)\n",
164
+ "visualizer.plot_spectrogram(wav, \"原始频谱图\", ax=plt.gca())\n",
165
+ "plt.subplot(2, 1, 2)\n",
166
+ "visualizer.plot_spectrogram(wav_dbrms, f\"dBrms增强频谱图 (目标电平: -6 dB)\", ax=plt.gca())\n",
167
+ "plt.tight_layout()\n",
168
+ "plt.show()\n",
169
+ "\n",
170
+ "# 响度对比\n",
171
+ "plt.figure(figsize=(10, 6))\n",
172
+ "labels = ['原始音频', 'dBrms增强']\n",
173
+ "peak_values = [original_loudness['peak_dbfs'], dbrms_loudness['peak_dbfs']]\n",
174
+ "rms_values = [original_loudness['rms_dbfs'], dbrms_loudness['rms_dbfs']]\n",
175
+ "lufs_values = [original_loudness['lufs'], dbrms_loudness['lufs']]\n",
176
+ "\n",
177
+ "x = np.arange(len(labels))\n",
178
+ "width = 0.25\n",
179
+ "\n",
180
+ "plt.bar(x - width, peak_values, width, label='峰值 (dBFS)')\n",
181
+ "plt.bar(x, rms_values, width, label='RMS (dBFS)')\n",
182
+ "plt.bar(x + width, lufs_values, width, label='LUFS')\n",
183
+ "\n",
184
+ "plt.ylabel('分贝')\n",
185
+ "plt.title('dBrms音量增强前后响度对比')\n",
186
+ "plt.xticks(x, labels)\n",
187
+ "plt.legend()\n",
188
+ "plt.grid(True, alpha=0.3)\n",
189
+ "plt.tight_layout()\n",
190
+ "plt.show()\n",
191
+ "\n",
192
+ "print(f\"dBrms增强后响度信息:\")\n",
193
+ "print(f\"峰值: {dbrms_loudness['peak_dbfs']:.2f} dBFS (变化: {dbrms_loudness['peak_dbfs'] - original_loudness['peak_dbfs']:.2f} dB)\")\n",
194
+ "print(f\"RMS: {dbrms_loudness['rms_dbfs']:.2f} dBFS (变化: {dbrms_loudness['rms_dbfs'] - original_loudness['rms_dbfs']:.2f} dB)\")\n",
195
+ "print(f\"LUFS: {dbrms_loudness['lufs']:.2f} LUFS (变化: {dbrms_loudness['lufs'] - original_loudness['lufs']:.2f} dB)\")\n",
196
+ "print(f\"峰均比: {dbrms_loudness['crest_factor_db']:.2f} dB\")"
197
+ ]
198
+ },
199
+ {
200
+ "cell_type": "markdown",
201
+ "id": "e6a9695f",
202
+ "metadata": {},
203
+ "source": [
204
+ "# lufs音量增强"
205
+ ]
206
+ },
207
+ {
208
+ "cell_type": "code",
209
+ "execution_count": null,
210
+ "id": "30b21884",
211
+ "metadata": {},
212
+ "outputs": [],
213
+ "source": [
214
+ "wav_lufs = volume_aug_lufs(wav, -16)\n",
215
+ "\n",
216
+ "# 测量处理后的响度\n",
217
+ "lufs_loudness = measure_loudness(wav_lufs, sr)\n",
218
+ "\n",
219
+ "# 绘制时域和频域图\n",
220
+ "plt.figure(figsize=(16, 8))\n",
221
+ "\n",
222
+ "# 波形图\n",
223
+ "plt.subplot(2, 1, 1)\n",
224
+ "visualizer.plot_waveform(wav, \"原始波形\", ax=plt.gca())\n",
225
+ "plt.subplot(2, 1, 2)\n",
226
+ "visualizer.plot_waveform(wav_lufs, f\"LUFS增强波形 (目标响度: -16 LUFS)\", ax=plt.gca())\n",
227
+ "plt.tight_layout()\n",
228
+ "plt.show()\n",
229
+ "\n",
230
+ "# 频谱图\n",
231
+ "plt.figure(figsize=(16, 8))\n",
232
+ "plt.subplot(2, 1, 1)\n",
233
+ "visualizer.plot_spectrogram(wav, \"原始频谱图\", ax=plt.gca())\n",
234
+ "plt.subplot(2, 1, 2)\n",
235
+ "visualizer.plot_spectrogram(wav_lufs, f\"LUFS增强频谱图 (目标响度: -16 LUFS)\", ax=plt.gca())\n",
236
+ "plt.tight_layout()\n",
237
+ "plt.show()\n",
238
+ "\n",
239
+ "# 响度对比\n",
240
+ "plt.figure(figsize=(10, 6))\n",
241
+ "labels = ['原始音频', 'LUFS增强']\n",
242
+ "peak_values = [original_loudness['peak_dbfs'], lufs_loudness['peak_dbfs']]\n",
243
+ "rms_values = [original_loudness['rms_dbfs'], lufs_loudness['rms_dbfs']]\n",
244
+ "lufs_values = [original_loudness['lufs'], lufs_loudness['lufs']]\n",
245
+ "\n",
246
+ "x = np.arange(len(labels))\n",
247
+ "width = 0.25\n",
248
+ "\n",
249
+ "plt.bar(x - width, peak_values, width, label='峰值 (dBFS)')\n",
250
+ "plt.bar(x, rms_values, width, label='RMS (dBFS)')\n",
251
+ "plt.bar(x + width, lufs_values, width, label='LUFS')\n",
252
+ "\n",
253
+ "plt.ylabel('分贝')\n",
254
+ "plt.title('LUFS音量增强前后响度对比')\n",
255
+ "plt.xticks(x, labels)\n",
256
+ "plt.legend()\n",
257
+ "plt.grid(True, alpha=0.3)\n",
258
+ "plt.tight_layout()\n",
259
+ "plt.show()\n",
260
+ "\n",
261
+ "print(f\"LUFS增强后响度信息:\")\n",
262
+ "print(f\"峰值: {lufs_loudness['peak_dbfs']:.2f} dBFS (变化: {lufs_loudness['peak_dbfs'] - original_loudness['peak_dbfs']:.2f} dB)\")\n",
263
+ "print(f\"RMS: {lufs_loudness['rms_dbfs']:.2f} dBFS (变化: {lufs_loudness['rms_dbfs'] - original_loudness['rms_dbfs']:.2f} dB)\")\n",
264
+ "print(f\"LUFS: {lufs_loudness['lufs']:.2f} LUFS (变化: {lufs_loudness['lufs'] - original_loudness['lufs']:.2f} dB)\")\n",
265
+ "print(f\"峰均比: {lufs_loudness['crest_factor_db']:.2f} dB\")"
266
+ ]
267
+ },
268
+ {
269
+ "cell_type": "code",
270
+ "execution_count": null,
271
+ "id": "k6vath03xyg",
272
+ "metadata": {},
273
+ "outputs": [],
274
+ "source": [
275
+ "# 创建三种增强方法的综合对比图\n",
276
+ "plt.figure(figsize=(16, 12))\n",
277
+ "\n",
278
+ "# 波形对比\n",
279
+ "plt.subplot(3, 1, 1)\n",
280
+ "time = np.linspace(0, len(wav) / sr, len(wav))\n",
281
+ "plt.plot(time, wav, label='原始波形', alpha=0.7)\n",
282
+ "plt.plot(time, wav_linear, label='Linear增强', alpha=0.7)\n",
283
+ "plt.plot(time, wav_dbrms, label='dBrms增强', alpha=0.7)\n",
284
+ "plt.plot(time, wav_lufs, label='LUFS增强', alpha=0.7)\n",
285
+ "plt.title('三种音量增强方法波形对比')\n",
286
+ "plt.xlabel('时间 (s)')\n",
287
+ "plt.ylabel('幅度')\n",
288
+ "plt.grid(True, alpha=0.3)\n",
289
+ "plt.legend()\n",
290
+ "\n",
291
+ "# 响度对比 - 条形图\n",
292
+ "plt.subplot(3, 1, 2)\n",
293
+ "labels = ['原始音频', 'Linear增强', 'dBrms增强', 'LUFS增强']\n",
294
+ "peak_values = [original_loudness['peak_dbfs'], linear_loudness['peak_dbfs'], \n",
295
+ " dbrms_loudness['peak_dbfs'], lufs_loudness['peak_dbfs']]\n",
296
+ "rms_values = [original_loudness['rms_dbfs'], linear_loudness['rms_dbfs'], \n",
297
+ " dbrms_loudness['rms_dbfs'], lufs_loudness['rms_dbfs']]\n",
298
+ "lufs_values = [original_loudness['lufs'], linear_loudness['lufs'], \n",
299
+ " dbrms_loudness['lufs'], lufs_loudness['lufs']]\n",
300
+ "\n",
301
+ "x = np.arange(len(labels))\n",
302
+ "width = 0.25\n",
303
+ "\n",
304
+ "plt.bar(x - width, peak_values, width, label='峰值 (dBFS)')\n",
305
+ "plt.bar(x, rms_values, width, label='RMS (dBFS)')\n",
306
+ "plt.bar(x + width, lufs_values, width, label='LUFS')\n",
307
+ "\n",
308
+ "plt.ylabel('分贝')\n",
309
+ "plt.title('三种音量增强方法响度对比')\n",
310
+ "plt.xticks(x, labels)\n",
311
+ "plt.legend()\n",
312
+ "plt.grid(True, alpha=0.3)\n",
313
+ "\n",
314
+ "# 峰均比对比\n",
315
+ "plt.subplot(3, 1, 3)\n",
316
+ "crest_values = [original_loudness['crest_factor_db'], linear_loudness['crest_factor_db'],\n",
317
+ " dbrms_loudness['crest_factor_db'], lufs_loudness['crest_factor_db']]\n",
318
+ "\n",
319
+ "plt.bar(x, crest_values, width=0.5, color='purple', alpha=0.7)\n",
320
+ "plt.ylabel('分贝')\n",
321
+ "plt.title('三种音量增强方法峰均比对比')\n",
322
+ "plt.xticks(x, labels)\n",
323
+ "plt.grid(True, alpha=0.3)\n",
324
+ "\n",
325
+ "plt.tight_layout()\n",
326
+ "plt.show()\n",
327
+ "\n",
328
+ "# 创建综合对比表格\n",
329
+ "print(\"\\n三种音量增强方法综合对比:\")\n",
330
+ "print(\"=\" * 80)\n",
331
+ "print(f\"{'方法':<12}{'峰值 (dBFS)':<20}{'RMS (dBFS)':<20}{'LUFS':<20}{'峰均比 (dB)':<15}\")\n",
332
+ "print(\"-\" * 80)\n",
333
+ "print(f\"{'原始音频':<12}{original_loudness['peak_dbfs']:<20.2f}{original_loudness['rms_dbfs']:<20.2f}{original_loudness['lufs']:<20.2f}{original_loudness['crest_factor_db']:<15.2f}\")\n",
334
+ "print(f\"{'Linear增强':<12}{linear_loudness['peak_dbfs']:<20.2f}{linear_loudness['rms_dbfs']:<20.2f}{linear_loudness['lufs']:<20.2f}{linear_loudness['crest_factor_db']:<15.2f}\")\n",
335
+ "print(f\"{'dBrms增强':<12}{dbrms_loudness['peak_dbfs']:<20.2f}{dbrms_loudness['rms_dbfs']:<20.2f}{dbrms_loudness['lufs']:<20.2f}{dbrms_loudness['crest_factor_db']:<15.2f}\")\n",
336
+ "print(f\"{'LUFS增强':<12}{lufs_loudness['peak_dbfs']:<20.2f}{lufs_loudness['rms_dbfs']:<20.2f}{lufs_loudness['lufs']:<20.2f}{lufs_loudness['crest_factor_db']:<15.2f}\")\n",
337
+ "print(\"=\" * 80)\n",
338
+ "\n",
339
+ "# 各方法响度变化量\n",
340
+ "print(\"\\n响度变化量 (相对于原始音频):\")\n",
341
+ "print(\"=\" * 80)\n",
342
+ "print(f\"{'方法':<12}{'峰值变化 (dB)':<20}{'RMS变化 (dB)':<20}{'LUFS变化 (dB)':<20}\")\n",
343
+ "print(\"-\" * 80)\n",
344
+ "print(f\"{'Linear增强':<12}{linear_loudness['peak_dbfs'] - original_loudness['peak_dbfs']:<20.2f}{linear_loudness['rms_dbfs'] - original_loudness['rms_dbfs']:<20.2f}{linear_loudness['lufs'] - original_loudness['lufs']:<20.2f}\")\n",
345
+ "print(f\"{'dBrms增强':<12}{dbrms_loudness['peak_dbfs'] - original_loudness['peak_dbfs']:<20.2f}{dbrms_loudness['rms_dbfs'] - original_loudness['rms_dbfs']:<20.2f}{dbrms_loudness['lufs'] - original_loudness['lufs']:<20.2f}\")\n",
346
+ "print(f\"{'LUFS增强':<12}{lufs_loudness['peak_dbfs'] - original_loudness['peak_dbfs']:<20.2f}{lufs_loudness['rms_dbfs'] - original_loudness['rms_dbfs']:<20.2f}{lufs_loudness['lufs'] - original_loudness['lufs']:<20.2f}\")\n",
347
+ "print(\"=\" * 80)\n",
348
+ "\n",
349
+ "# 总结分析\n",
350
+ "print(\"\\n音量增强方法分析总结:\")\n",
351
+ "print(\"=\" * 80)\n",
352
+ "print(\"1. Linear增强 (volume_aug_linmax):\")\n",
353
+ "print(\" - 基于峰值的线性增益,目标为最大峰值为指定的线性值\")\n",
354
+ "print(\" - 特点:保持动态范围不变,整体增强或减弱\")\n",
355
+ "print(\" - 适用场景:需要精确控制峰值而不改变音频动态特性的场合\")\n",
356
+ "print(\"\\n2. dBrms增强 (volume_aug_dbrms):\")\n",
357
+ "print(\" - 基于RMS电平的增益,目标为指定的dB电平\")\n",
358
+ "print(\" - 特点:以能量均值为基准,更接近人耳感知\")\n",
359
+ "print(\" - 适用场景:需要统一音频能量电平的场合\")\n",
360
+ "print(\"\\n3. LUFS增强 (volume_aug_lufs):\")\n",
361
+ "print(\" - 基于国际响度标准的增益,符合广播标准\")\n",
362
+ "print(\" - 特点:考虑人耳频率加权,最接近人耳响度感知\")\n",
363
+ "print(\" - 适用场景:广播、流媒体、专业音频处理等需要符合响度标准的场合\")\n",
364
+ "print(\"=\" * 80)"
365
+ ]
366
+ },
367
+ {
368
+ "cell_type": "markdown",
369
+ "id": "2fb931jxgtr",
370
+ "metadata": {},
371
+ "source": [
372
+ "# 音量增强方法综合分析\n",
373
+ "\n",
374
+ "本笔记本演示并分析了neverlib库中三种音量增强方法的特性和效果:\n",
375
+ "\n",
376
+ "1. **Linear增强 (volume_aug_linmax)**\n",
377
+ " - 基于峰值的线性增益\n",
378
+ " - 将音频最大峰值调整为目标线性值(0-1范围)\n",
379
+ " - 保持动态范围不变\n",
380
+ "\n",
381
+ "2. **dBrms增强 (volume_aug_dbrms)**\n",
382
+ " - 基于均方根(RMS)功率的增益\n",
383
+ " - 将音频RMS电平调整为目标分贝值\n",
384
+ " - 更接近能量感知\n",
385
+ "\n",
386
+ "3. **LUFS增强 (volume_aug_lufs)**\n",
387
+ " - 基于感知响度单位(LUFS)的增益\n",
388
+ " - 符合广播标准的响度归一化\n",
389
+ " - 考虑人耳感知加权,最接近人耳响度感知\n",
390
+ "\n",
391
+ "各方法适用于不同场景,可根据需求选择合适的音量增强方式。"
392
+ ]
393
+ }
394
+ ],
395
+ "metadata": {
396
+ "kernelspec": {
397
+ "display_name": "py311torch211",
398
+ "language": "python",
399
+ "name": "python3"
400
+ },
401
+ "language_info": {
402
+ "codemirror_mode": {
403
+ "name": "ipython",
404
+ "version": 3
405
+ },
406
+ "file_extension": ".py",
407
+ "mimetype": "text/x-python",
408
+ "name": "python",
409
+ "nbconvert_exporter": "python",
410
+ "pygments_lexer": "ipython3",
411
+ "version": "3.11.9"
412
+ }
413
+ },
414
+ "nbformat": 4,
415
+ "nbformat_minor": 5
416
+ }