neverlib 0.2.4__py3-none-any.whl → 0.2.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- neverlib/.history/Docs/audio_aug/del_20250827162530.py +0 -0
- neverlib/.history/Docs/audio_aug/del_20250827162540.py +2 -0
- neverlib/.history/Docs/audio_aug/del_20250827162541.py +7 -0
- neverlib/.history/Docs/audio_aug/del_20250827162606.py +7 -0
- neverlib/.history/Docs/audio_aug/del_20250827162637.py +8 -0
- neverlib/.history/Docs/audio_aug/del_20250827162645.py +8 -0
- neverlib/.history/Docs/audio_aug/del_20250827162723.py +9 -0
- neverlib/.history/Docs/audio_aug/del_20250827162739.py +9 -0
- neverlib/.history/Docs/audio_aug/test_snr_20250827161751.py +55 -0
- neverlib/.history/Docs/audio_aug/test_snr_20250827161754.py +55 -0
- neverlib/.history/Docs/audio_aug/test_snr_20250827161833.py +54 -0
- neverlib/.history/Docs/audio_aug/test_snr_20250827162017.py +56 -0
- neverlib/.history/Docs/audio_aug/test_snr_20250827162021.py +57 -0
- neverlib/.history/Docs/audio_aug/test_snr_20250827162028.py +57 -0
- neverlib/.history/Docs/audio_aug_test/del_20250827162738.py +9 -0
- neverlib/.history/Docs/audio_aug_test/del_20250827162819.py +9 -0
- neverlib/.history/Docs/audio_aug_test/del_20250827162830.py +9 -0
- neverlib/.history/Docs/audio_aug_test/del_20250827162846.py +9 -0
- neverlib/.history/Docs/audio_aug_test/del_20250827162851.py +9 -0
- neverlib/.history/Docs/audio_aug_test/del_20250827162903.py +10 -0
- neverlib/.history/Docs/audio_aug_test/del_20250827162921.py +10 -0
- neverlib/.history/Docs/audio_aug_test/del_20250827162926.py +10 -0
- neverlib/.history/Docs/audio_aug_test/del_20250827163030.py +10 -0
- neverlib/.history/Docs/audio_aug_test/del_20250827163032.py +10 -0
- neverlib/.history/QA/html2markdown_20250822234112.md +0 -0
- neverlib/.history/QA/html2markdown_20250822234140.py +9 -0
- neverlib/.history/QA/html2markdown_20250822234141.md +9 -0
- neverlib/.history/QA/html2markdown_20250822234159.py +12 -0
- neverlib/.history/QA/html2markdown_20250822234200.py +17 -0
- neverlib/.history/QA/html2markdown_20250822234236.py +17 -0
- neverlib/.history/QA/html2markdown_20250822234340.py +14 -0
- neverlib/.history/QA/html2markdown_20250822234522.py +18 -0
- neverlib/.history/QA/html2markdown_20250822234601.py +20 -0
- neverlib/.history/QA/html2markdown_20250822234615.py +22 -0
- neverlib/.history/QA/html2markdown_20250822234715.py +28 -0
- neverlib/.history/QA/html2markdown_20250822234720.py +27 -0
- neverlib/.history/QA/html2markdown_20250822234903.py +27 -0
- neverlib/.history/__init___20250805234212.py +41 -0
- neverlib/.history/__init___20250904102635.py +39 -0
- neverlib/.history/__init___20250904102836.py +34 -0
- neverlib/.history/__init___20250904102838.py +39 -0
- neverlib/.history/__init___20250904102851.py +33 -0
- neverlib/.history/audio_aug/audio_aug_20250826155913.py +158 -0
- neverlib/.history/audio_aug/audio_aug_20250826164159.py +159 -0
- neverlib/.history/audio_aug/audio_aug_20250826164217.py +160 -0
- neverlib/.history/audio_aug/audio_aug_20250826164408.py +161 -0
- neverlib/.history/audio_aug/audio_aug_20250826164423.py +161 -0
- neverlib/.history/audio_aug/audio_aug_20250826164529.py +161 -0
- neverlib/.history/audio_aug/audio_aug_20250826164824.py +161 -0
- neverlib/.history/audio_aug/audio_aug_20250826164932.py +162 -0
- neverlib/.history/audio_aug/audio_aug_20250826164947.py +162 -0
- neverlib/.history/audio_aug/audio_aug_20250826165403.py +162 -0
- neverlib/.history/audio_aug/audio_aug_20250826165421.py +162 -0
- neverlib/.history/audio_aug/audio_aug_20250826165509.py +163 -0
- neverlib/.history/audio_aug/audio_aug_20250826165702.py +163 -0
- neverlib/.history/audio_aug/audio_aug_20250826165732.py +165 -0
- neverlib/.history/audio_aug/audio_aug_20250826170041.py +163 -0
- neverlib/.history/audio_aug/audio_aug_20250826170105.py +164 -0
- neverlib/.history/audio_aug/audio_aug_20250826170154.py +164 -0
- neverlib/.history/audio_aug/audio_aug_20250826170220.py +165 -0
- neverlib/.history/audio_aug/audio_aug_20250826170221.py +165 -0
- neverlib/.history/audio_aug/audio_aug_20250826170228.py +165 -0
- neverlib/.history/audio_aug/audio_aug_20250826170231.py +165 -0
- neverlib/.history/audio_aug/audio_aug_20250826212001.py +165 -0
- neverlib/.history/audio_aug/audio_aug_20250826220038.py +165 -0
- neverlib/.history/audio_aug/audio_aug_20250826220133.py +165 -0
- neverlib/.history/audio_aug/audio_aug_20250826220148.py +165 -0
- neverlib/.history/audio_aug/audio_aug_20250826220154.py +165 -0
- neverlib/.history/audio_aug/audio_aug_20250826220156.py +165 -0
- neverlib/.history/audio_aug/audio_aug_20250826220314.py +165 -0
- neverlib/.history/audio_aug/audio_aug_20250826220343.py +184 -0
- neverlib/.history/audio_aug/audio_aug_20250826220345.py +184 -0
- neverlib/.history/audio_aug/audio_aug_20250826220349.py +184 -0
- neverlib/.history/audio_aug/audio_aug_20250826220429.py +184 -0
- neverlib/.history/audio_aug/audio_aug_20250826220447.py +184 -0
- neverlib/.history/audio_aug/audio_aug_20250826220601.py +186 -0
- neverlib/.history/audio_aug/audio_aug_20250826220638.py +186 -0
- neverlib/.history/audio_aug/audio_aug_20250826220641.py +186 -0
- neverlib/.history/audio_aug/audio_aug_20250826220647.py +186 -0
- neverlib/.history/audio_aug/audio_aug_20250826220653.py +186 -0
- neverlib/.history/audio_aug/audio_aug_20250826220655.py +186 -0
- neverlib/.history/audio_aug/audio_aug_20250826220731.py +185 -0
- neverlib/.history/audio_aug/audio_aug_20250826220739.py +185 -0
- neverlib/.history/audio_aug/audio_aug_20250826220747.py +185 -0
- neverlib/.history/audio_aug/audio_aug_20250826220801.py +186 -0
- neverlib/.history/audio_aug/audio_aug_20250826220822.py +186 -0
- neverlib/.history/audio_aug/audio_aug_20250826220901.py +186 -0
- neverlib/.history/audio_aug/audio_aug_20250826221107.py +187 -0
- neverlib/.history/audio_aug/audio_aug_20250826221310.py +188 -0
- neverlib/.history/audio_aug/audio_aug_20250826221353.py +191 -0
- neverlib/.history/audio_aug/audio_aug_20250826221821.py +191 -0
- neverlib/.history/audio_aug/audio_aug_20250826221838.py +191 -0
- neverlib/.history/audio_aug/audio_aug_20250826221906.py +191 -0
- neverlib/.history/audio_aug/audio_aug_20250826221930.py +191 -0
- neverlib/.history/audio_aug/audio_aug_20250826221939.py +191 -0
- neverlib/.history/audio_aug/audio_aug_20250826221955.py +191 -0
- neverlib/.history/audio_aug/audio_aug_20250826222008.py +197 -0
- neverlib/.history/audio_aug/audio_aug_20250826222017.py +200 -0
- neverlib/.history/audio_aug/audio_aug_20250826222046.py +203 -0
- neverlib/.history/audio_aug/audio_aug_20250826222105.py +203 -0
- neverlib/.history/audio_aug/audio_aug_20250826222206.py +203 -0
- neverlib/.history/audio_aug/audio_aug_20250826222302.py +203 -0
- neverlib/.history/audio_aug/audio_aug_20250826222336.py +203 -0
- neverlib/.history/audio_aug/audio_aug_20250826222455.py +204 -0
- neverlib/.history/audio_aug/audio_aug_20250826222526.py +204 -0
- neverlib/.history/audio_aug/audio_aug_20250826222541.py +204 -0
- neverlib/.history/audio_aug/audio_aug_20250826222624.py +202 -0
- neverlib/.history/audio_aug/audio_aug_20250826222714.py +205 -0
- neverlib/.history/audio_aug/audio_aug_20250826222820.py +205 -0
- neverlib/.history/audio_aug/audio_aug_20250826222827.py +205 -0
- neverlib/.history/audio_aug/audio_aug_20250826222927.py +232 -0
- neverlib/.history/audio_aug/audio_aug_20250826223009.py +232 -0
- neverlib/.history/audio_aug/audio_aug_20250826223054.py +232 -0
- neverlib/.history/audio_aug/audio_aug_20250826223225.py +233 -0
- neverlib/.history/audio_aug/audio_aug_20250826223344.py +236 -0
- neverlib/.history/audio_aug/audio_aug_20250826223356.py +236 -0
- neverlib/.history/audio_aug/audio_aug_20250826223955.py +242 -0
- neverlib/.history/audio_aug/audio_aug_20250826224210.py +240 -0
- neverlib/.history/audio_aug/audio_aug_20250826224250.py +242 -0
- neverlib/.history/audio_aug/audio_aug_20250826224323.py +280 -0
- neverlib/.history/audio_aug/audio_aug_20250826224452.py +263 -0
- neverlib/.history/audio_aug/audio_aug_20250826224455.py +263 -0
- neverlib/.history/audio_aug/audio_aug_20250826224502.py +263 -0
- neverlib/.history/audio_aug/audio_aug_20250826224528.py +263 -0
- neverlib/.history/audio_aug/audio_aug_20250826224658.py +263 -0
- neverlib/.history/audio_aug/audio_aug_20250826224833.py +264 -0
- neverlib/.history/audio_aug/audio_aug_20250826225013.py +269 -0
- neverlib/.history/audio_aug/audio_aug_20250826225050.py +269 -0
- neverlib/.history/audio_aug/audio_aug_20250826225241.py +268 -0
- neverlib/.history/audio_aug/audio_aug_20250826225315.py +266 -0
- neverlib/.history/audio_aug/audio_aug_20250826225404.py +266 -0
- neverlib/.history/audio_aug/audio_aug_20250826225502.py +265 -0
- neverlib/.history/audio_aug/audio_aug_20250826225950.py +267 -0
- neverlib/.history/audio_aug/audio_aug_20250826225959.py +268 -0
- neverlib/.history/audio_aug/audio_aug_20250826230222.py +271 -0
- neverlib/.history/audio_aug/audio_aug_20250826230248.py +270 -0
- neverlib/.history/audio_aug/audio_aug_20250826230638.py +266 -0
- neverlib/.history/audio_aug/audio_aug_20250826230755.py +266 -0
- neverlib/.history/audio_aug/audio_aug_20250826230941.py +265 -0
- neverlib/.history/audio_aug/audio_aug_20250826231054.py +266 -0
- neverlib/.history/audio_aug/audio_aug_20250826231117.py +266 -0
- neverlib/.history/audio_aug/audio_aug_20250826231219.py +266 -0
- neverlib/.history/audio_aug/audio_aug_20250826232330.py +266 -0
- neverlib/.history/audio_aug/audio_aug_20250826232352.py +266 -0
- neverlib/.history/audio_aug/audio_aug_20250827152748.py +268 -0
- neverlib/.history/audio_aug/audio_aug_20250827152806.py +268 -0
- neverlib/.history/audio_aug/audio_aug_20250827152808.py +268 -0
- neverlib/.history/audio_aug/audio_aug_20250827152917.py +283 -0
- neverlib/.history/audio_aug/audio_aug_20250827152929.py +281 -0
- neverlib/.history/audio_aug/audio_aug_20250827153100.py +286 -0
- neverlib/.history/audio_aug/audio_aug_20250827153102.py +286 -0
- neverlib/.history/audio_aug/audio_aug_20250827153301.py +295 -0
- neverlib/.history/audio_aug/audio_aug_20250827153331.py +298 -0
- neverlib/.history/audio_aug/audio_aug_20250827153525.py +303 -0
- neverlib/.history/audio_aug/audio_aug_20250827153533.py +304 -0
- neverlib/.history/audio_aug/audio_aug_20250827153541.py +321 -0
- neverlib/.history/audio_aug/audio_aug_20250827153805.py +322 -0
- neverlib/.history/audio_aug/audio_aug_20250827153832.py +323 -0
- neverlib/.history/audio_aug/audio_aug_20250827153836.py +324 -0
- neverlib/.history/audio_aug/audio_aug_20250827153846.py +324 -0
- neverlib/.history/audio_aug/audio_aug_20250827153859.py +325 -0
- neverlib/.history/audio_aug/audio_aug_20250827154453.py +337 -0
- neverlib/.history/audio_aug/audio_aug_20250827154513.py +355 -0
- neverlib/.history/audio_aug/audio_aug_20250827154538.py +356 -0
- neverlib/.history/audio_aug/audio_aug_20250827154541.py +357 -0
- neverlib/.history/audio_aug/audio_aug_20250827154612.py +357 -0
- neverlib/.history/audio_aug/audio_aug_20250827154657.py +360 -0
- neverlib/.history/audio_aug/audio_aug_20250827154708.py +360 -0
- neverlib/.history/audio_aug/audio_aug_20250827154728.py +366 -0
- neverlib/.history/audio_aug/audio_aug_20250827154755.py +367 -0
- neverlib/.history/audio_aug/audio_aug_20250827154800.py +367 -0
- neverlib/.history/audio_aug/audio_aug_20250827154917.py +368 -0
- neverlib/.history/audio_aug/audio_aug_20250827154928.py +369 -0
- neverlib/.history/audio_aug/audio_aug_20250827154932.py +370 -0
- neverlib/.history/audio_aug/audio_aug_20250827154947.py +372 -0
- neverlib/.history/audio_aug/audio_aug_20250827155015.py +375 -0
- neverlib/.history/audio_aug/audio_aug_20250827155106.py +375 -0
- neverlib/.history/audio_aug/audio_aug_20250827155114.py +393 -0
- neverlib/.history/audio_aug/audio_aug_20250827155207.py +415 -0
- neverlib/.history/audio_aug/audio_aug_20250827155300.py +415 -0
- neverlib/.history/audio_aug/audio_aug_20250827155321.py +471 -0
- neverlib/.history/audio_aug/audio_aug_20250827164703.py +471 -0
- neverlib/.history/audio_aug/audio_aug_20250827164749.py +471 -0
- neverlib/.history/audio_aug/audio_aug_20250827165252.py +472 -0
- neverlib/.history/audio_aug/audio_aug_20250827165334.py +472 -0
- neverlib/.history/audio_aug/audio_aug_20250827165404.py +473 -0
- neverlib/.history/audio_aug/audio_aug_20250827165610.py +473 -0
- neverlib/.history/audio_aug/audio_aug_20250827165805.py +473 -0
- neverlib/.history/audio_aug/audio_aug_20250827170056.py +473 -0
- neverlib/.history/audio_aug/audio_aug_20250827170106.py +472 -0
- neverlib/.history/audio_aug/audio_aug_20250827170143.py +472 -0
- neverlib/.history/audio_aug/audio_aug_20250827170216.py +472 -0
- neverlib/.history/audio_aug/audio_aug_20250827170218.py +472 -0
- neverlib/.history/audio_aug/audio_aug_20250827170314.py +472 -0
- neverlib/.history/audio_aug/audio_aug_20250827171500.py +471 -0
- neverlib/.history/audio_aug/audio_aug_20250827172347.py +471 -0
- neverlib/.history/audio_aug/audio_aug_20250827172558.py +470 -0
- neverlib/.history/audio_aug/audio_aug_20250827172559.py +470 -0
- neverlib/.history/audio_aug/audio_aug_20250827172801.py +470 -0
- neverlib/.history/audio_aug/audio_aug_20250827182522.py +470 -0
- neverlib/.history/audio_aug/audio_aug_20250827182526.py +470 -0
- neverlib/.history/audio_aug/audio_aug_20250827182626.py +470 -0
- neverlib/.history/audio_aug/audio_aug_20250827182715.py +470 -0
- neverlib/.history/audio_aug/audio_aug_20250904185444.py +470 -0
- neverlib/.history/audio_aug/audio_aug_20250904185538.py +445 -0
- neverlib/.history/data_analyze/__init___20250806204158.py +14 -0
- neverlib/.history/data_analyze/__init___20250827163248.py +14 -0
- neverlib/.history/filter/auto_eq/freq_eq_20250821143140.py +76 -0
- neverlib/.history/filter/auto_eq/freq_eq_20250821153208.py +76 -0
- neverlib/.history/filter/auto_eq/freq_eq_20250821153214.py +76 -0
- neverlib/.history/filter/auto_eq/ga_eq_basic_20250901110521.py +385 -0
- neverlib/.history/filter/auto_eq/ga_eq_basic_20250901110652.py +385 -0
- neverlib/.history/filter/common_20250806002134.py +37 -0
- neverlib/.history/filter/common_20250821120448.py +49 -0
- neverlib/.history/filter/common_20250821120453.py +49 -0
- neverlib/.history/metrics/snr_20250827224201.py +182 -0
- neverlib/.history/metrics/snr_20250827234019.py +186 -0
- neverlib/.history/metrics/snr_20250827234028.py +186 -0
- neverlib/.history/metrics/snr_20250827234030.py +186 -0
- neverlib/.history/utils/audio_split_20250805234209.py +268 -0
- neverlib/.history/utils/audio_split_20250904185309.py +268 -0
- neverlib/.history/utils/utils_20250813165516.py +330 -0
- neverlib/.history/utils/utils_20250904181341.py +328 -0
- neverlib/.history/utils/utils_20250904185546.py +352 -0
- neverlib/.history/utils/utils_20250904185548.py +353 -0
- neverlib/.history/utils/utils_20250904185603.py +353 -0
- neverlib/.history/utils/utils_20250904185636.py +353 -0
- neverlib/.history/utils/utils_20250904185658.py +358 -0
- neverlib/.history/utils/utils_20250904190053.py +359 -0
- neverlib/.specstory/history/2025-08-22_02-10Z-/345/256/214/345/226/204/345/207/275/346/225/260/347/232/204/345/212/237/350/203/275/345/222/214/345/217/230/351/207/217/345/220/215/345/273/272/350/256/256.md +247 -0
- neverlib/.specstory/history/2025-08-26_11-54Z-oserror-missing-shared-object-file.md +87 -0
- neverlib/.specstory/history/2025-08-27_08-07Z-/345/256/214/345/226/204/346/265/213/350/257/225/346/226/207/346/241/243/347/232/204/350/256/250/350/256/272.md +296 -0
- neverlib/.specstory/history/2025-08-27_08-29Z-delete-python-file-command.md +211 -0
- neverlib/.specstory/history/2025-08-27_09-05Z-/345/234/250jupyter/344/270/255/346/222/255/346/224/276/351/237/263/351/242/221/347/232/204/344/273/243/347/240/201/344/277/256/346/224/271.md +357 -0
- neverlib/Docs/audio_aug_test/test_snr.py +55 -0
- neverlib/Docs/audio_aug_test/test_volume.py +0 -0
- neverlib/QA/html2markdown.py +27 -0
- neverlib/__init__.py +10 -20
- neverlib/audio_aug/__init__.py +6 -1
- neverlib/audio_aug/audio_aug.py +360 -55
- neverlib/data_analyze/__init__.py +8 -2
- neverlib/data_analyze/temporal_features.py +1 -1
- neverlib/filter/__init__.py +9 -3
- neverlib/filter/auto_eq/freq_eq.py +1 -1
- neverlib/filter/auto_eq/ga_eq_basic.py +3 -3
- neverlib/filter/common.py +12 -0
- neverlib/metrics/snr.py +5 -3
- neverlib/utils/__init__.py +14 -7
- neverlib/utils/lazy_module.py +81 -0
- neverlib/utils/message.py +3 -8
- neverlib/utils/utils.py +32 -3
- neverlib/vad/__init__.py +16 -9
- neverlib/vad/utils.py +20 -6
- {neverlib-0.2.4.dist-info → neverlib-0.2.6.dist-info}/METADATA +21 -17
- neverlib-0.2.6.dist-info/RECORD +467 -0
- neverlib-0.2.4.dist-info/RECORD +0 -229
- /neverlib/{Docs/audio_aug/test_snr.py → .history/Docs/audio_aug/test_snr_20250827162033.py} +0 -0
- {neverlib-0.2.4.dist-info → neverlib-0.2.6.dist-info}/WHEEL +0 -0
- {neverlib-0.2.4.dist-info → neverlib-0.2.6.dist-info}/licenses/LICENSE +0 -0
- {neverlib-0.2.4.dist-info → neverlib-0.2.6.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,182 @@
|
|
|
1
|
+
import librosa
|
|
2
|
+
import numpy as np
|
|
3
|
+
from neverlib.vad.utils import vad2nad
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def get_snr(speech, noise):
|
|
7
|
+
"""计算信噪比
|
|
8
|
+
Args:
|
|
9
|
+
speech: 语音音频
|
|
10
|
+
noise: 噪声音频
|
|
11
|
+
Returns:
|
|
12
|
+
snr: 信噪比
|
|
13
|
+
"""
|
|
14
|
+
assert speech.ndim == noise.ndim, "speech和noise的维度不一样"
|
|
15
|
+
|
|
16
|
+
power_speech = np.mean(speech**2)
|
|
17
|
+
power_noise = max(np.mean(noise**2), 1e-10)
|
|
18
|
+
|
|
19
|
+
snr = 10 * np.log10(power_speech / power_noise)
|
|
20
|
+
return snr
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def get_snr_from_noisy(noisy, speech_vad=None):
|
|
24
|
+
"""根据带噪音频计算信噪比
|
|
25
|
+
Args:
|
|
26
|
+
noisy: 带噪音频
|
|
27
|
+
speech_vad: [{start:xxx, end:xxx}, ...]
|
|
28
|
+
Returns:
|
|
29
|
+
snr: 信噪比
|
|
30
|
+
"""
|
|
31
|
+
assert speech_vad is not None, "speech_vad不能为空"
|
|
32
|
+
|
|
33
|
+
# 提取语音段
|
|
34
|
+
speech_segments = []
|
|
35
|
+
for segment in speech_vad:
|
|
36
|
+
start = segment['start']
|
|
37
|
+
end = segment['end']
|
|
38
|
+
speech_segments.append(noisy[start:end])
|
|
39
|
+
speech = np.concatenate(speech_segments, axis=0)
|
|
40
|
+
|
|
41
|
+
# 提取非语音段
|
|
42
|
+
noise_segments = []
|
|
43
|
+
noise_point_list = vad2nad(speech_vad, len(noisy))
|
|
44
|
+
for noise_point in noise_point_list:
|
|
45
|
+
noise_segments.append(noisy[noise_point['start']:noise_point['end']])
|
|
46
|
+
noise = np.concatenate(noise_segments, axis=0)
|
|
47
|
+
|
|
48
|
+
P_speech_noise = np.mean(speech ** 2) # 语音+噪声功率
|
|
49
|
+
P_noise = max(np.mean(noise ** 2), EPS) # 纯噪声功率
|
|
50
|
+
|
|
51
|
+
# 计算净语音功率
|
|
52
|
+
P_speech = max(P_speech_noise - P_noise, EPS)
|
|
53
|
+
snr = 10 * np.log10(P_speech / P_noise)
|
|
54
|
+
|
|
55
|
+
return snr
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def seg_snr(clean, noisy, frame_length: int, hop_length: int):
|
|
59
|
+
"""
|
|
60
|
+
分帧计算信噪比
|
|
61
|
+
Args:
|
|
62
|
+
clean: 干净音频, numpy array
|
|
63
|
+
noisy: 带噪音频, numpy array
|
|
64
|
+
frame_length: 帧长
|
|
65
|
+
hop_length: 帧移
|
|
66
|
+
Returns:
|
|
67
|
+
snr_mean: 平均信噪比, float
|
|
68
|
+
Raises:
|
|
69
|
+
ValueError: 当输入参数不合法时抛出
|
|
70
|
+
"""
|
|
71
|
+
assert clean.shape == noisy.shape, "clean和noisy的维度不一样"
|
|
72
|
+
|
|
73
|
+
# 分帧
|
|
74
|
+
clean_frames = librosa.util.frame(clean, frame_length=frame_length, hop_length=hop_length) # (frame_length, n_frames)
|
|
75
|
+
noisy_frames = librosa.util.frame(noisy, frame_length=frame_length, hop_length=hop_length) # (frame_length, n_frames)
|
|
76
|
+
|
|
77
|
+
# 计算每帧的信噪比
|
|
78
|
+
snr_frames = []
|
|
79
|
+
for i in range(clean_frames.shape[1]):
|
|
80
|
+
clean_frame = clean_frames[:, i]
|
|
81
|
+
noisy_frame = noisy_frames[:, i]
|
|
82
|
+
# 跳过静音帧
|
|
83
|
+
if np.all(np.abs(clean_frame) < 1e-6) or np.all(np.abs(noisy_frame) < 1e-6):
|
|
84
|
+
continue
|
|
85
|
+
snr_frames.append(get_snr(clean_frame, noisy_frame))
|
|
86
|
+
|
|
87
|
+
# 如果所有帧都是静音
|
|
88
|
+
if not snr_frames:
|
|
89
|
+
return float('-inf')
|
|
90
|
+
|
|
91
|
+
return np.mean(snr_frames)
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def psnr(clean, noisy, max_val=None):
|
|
95
|
+
"""
|
|
96
|
+
计算峰值信噪比
|
|
97
|
+
Args:
|
|
98
|
+
clean: 干净音频, numpy array
|
|
99
|
+
noisy: 带噪音频, numpy array
|
|
100
|
+
max_val: 信号最大值, 如果为None则使用clean信号的实际最大值
|
|
101
|
+
Returns:
|
|
102
|
+
psnr: 峰值信噪比, 单位dB
|
|
103
|
+
"""
|
|
104
|
+
assert clean.shape == noisy.shape, "clean和noisy的维度不一样"
|
|
105
|
+
|
|
106
|
+
# 如果没有指定最大值, 使用clean信号的实际最大值
|
|
107
|
+
if max_val is None:
|
|
108
|
+
max_val = np.abs(clean).max()
|
|
109
|
+
|
|
110
|
+
# 计算均方误差 (MSE)
|
|
111
|
+
mse = np.mean((clean - noisy) ** 2)
|
|
112
|
+
|
|
113
|
+
# 避免除以0
|
|
114
|
+
if mse == 0:
|
|
115
|
+
return float('inf')
|
|
116
|
+
|
|
117
|
+
# 计算PSNR
|
|
118
|
+
psnr = 10 * np.log10(max_val**2 / mse)
|
|
119
|
+
return psnr
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def si_sdr(reference, estimate, epsilon=1e-8):
|
|
123
|
+
"""
|
|
124
|
+
计算尺度不变信噪比 (Scale-Invariant Signal-to-Distortion Ratio, SI-SDR)。
|
|
125
|
+
|
|
126
|
+
Args:
|
|
127
|
+
reference (np.ndarray): 原始的、干净的参考信号 (一维数组)。
|
|
128
|
+
estimate (np.ndarray): 模型估计或处理后的信号 (一维数组)。
|
|
129
|
+
epsilon (float): 一个非常小的数值, 用于防止分母为零, 保证数值稳定性。
|
|
130
|
+
|
|
131
|
+
Returns:
|
|
132
|
+
float: SI-SDR 值, 单位为分贝 (dB)。
|
|
133
|
+
"""
|
|
134
|
+
assert reference.shape == estimate.shape, "reference和estimate的维度不一样"
|
|
135
|
+
|
|
136
|
+
# 2. 零均值化 (可选但推荐)
|
|
137
|
+
# 移除直流分量, 使计算更关注信号的动态变化
|
|
138
|
+
reference = reference - np.mean(reference)
|
|
139
|
+
estimate = estimate - np.mean(estimate)
|
|
140
|
+
|
|
141
|
+
# 3. 计算目标信号分量 (s_target)
|
|
142
|
+
# s_target 是 estimate 在 reference 上的投影
|
|
143
|
+
# 公式: s_target = (<ŝ, s> / ||s||²) * s
|
|
144
|
+
dot_product = np.dot(estimate, reference) # <ŝ, s> (点积)
|
|
145
|
+
norm_s_squared = np.dot(reference, reference) # ||s||² (s的能量)
|
|
146
|
+
|
|
147
|
+
# 检查参考信号能量, 避免除以零
|
|
148
|
+
if norm_s_squared < epsilon:
|
|
149
|
+
# 如果参考信号几乎是静音, SI-SDR没有意义
|
|
150
|
+
return -np.inf # 返回负无穷或np.nan
|
|
151
|
+
|
|
152
|
+
alpha = dot_product / (norm_s_squared + epsilon) # 最佳缩放因子 α
|
|
153
|
+
s_target = alpha * reference
|
|
154
|
+
|
|
155
|
+
# 4. 计算误差/失真分量 (e_noise)
|
|
156
|
+
e_noise = estimate - s_target
|
|
157
|
+
|
|
158
|
+
# 5. 计算 SI-SDR
|
|
159
|
+
# SI-SDR = 10 * log10 ( ||s_target||² / ||e_noise||² )
|
|
160
|
+
power_s_target = np.sum(s_target**2) # ||s_target||²
|
|
161
|
+
power_e_noise = np.sum(e_noise**2) # ||e_noise||²
|
|
162
|
+
|
|
163
|
+
# 同样加上 epsilon 防止除以零
|
|
164
|
+
if power_e_noise < epsilon:
|
|
165
|
+
# 如果噪声能量极小, 说明匹配得非常好
|
|
166
|
+
return np.inf # 返回正无穷
|
|
167
|
+
|
|
168
|
+
si_sdr_val = 10 * np.log10(power_s_target / (power_e_noise + epsilon))
|
|
169
|
+
|
|
170
|
+
return si_sdr_val
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
if __name__ == "__main__":
|
|
174
|
+
# 生成测试信号
|
|
175
|
+
speech = np.random.randn(1000)
|
|
176
|
+
noise = np.random.randn(1000) * 0.1 # 较小的噪声
|
|
177
|
+
noisy = speech + noise
|
|
178
|
+
|
|
179
|
+
# 测试各种信噪比计算方法
|
|
180
|
+
print(f"SNR: {get_snr(speech, noise):.2f} dB")
|
|
181
|
+
print(f"Segmental SNR: {seg_snr(speech, noisy, 100, 50):.2f} dB")
|
|
182
|
+
print(f"PSNR: {psnr(speech, noisy):.2f} dB")
|
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
import librosa
|
|
2
|
+
import numpy as np
|
|
3
|
+
from neverlib.vad.utils import vad2nad
|
|
4
|
+
from neverlib.filter import HPFilter
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def get_snr(speech, noise, hpf=False):
|
|
8
|
+
"""计算信噪比
|
|
9
|
+
Args:
|
|
10
|
+
speech: 语音音频
|
|
11
|
+
noise: 噪声音频
|
|
12
|
+
Returns:
|
|
13
|
+
snr: 信噪比
|
|
14
|
+
"""
|
|
15
|
+
assert speech.ndim == noise.ndim, "speech和noise的维度不一样"
|
|
16
|
+
if hpf:
|
|
17
|
+
speech = HPFilter(speech, sr=sr, order=6, cutoff=100)
|
|
18
|
+
noise = HPFilter(noise, sr=sr, order=6, cutoff=100)
|
|
19
|
+
|
|
20
|
+
power_speech = np.mean(speech**2)
|
|
21
|
+
power_noise = max(np.mean(noise**2), 1e-10)
|
|
22
|
+
|
|
23
|
+
snr = 10 * np.log10(power_speech / power_noise)
|
|
24
|
+
return snr
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def get_snr_from_noisy(noisy, speech_vad=None):
|
|
28
|
+
"""根据带噪音频计算信噪比
|
|
29
|
+
Args:
|
|
30
|
+
noisy: 带噪音频
|
|
31
|
+
speech_vad: [{start:xxx, end:xxx}, ...]
|
|
32
|
+
Returns:
|
|
33
|
+
snr: 信噪比
|
|
34
|
+
"""
|
|
35
|
+
assert speech_vad is not None, "speech_vad不能为空"
|
|
36
|
+
|
|
37
|
+
# 提取语音段
|
|
38
|
+
speech_segments = []
|
|
39
|
+
for segment in speech_vad:
|
|
40
|
+
start = segment['start']
|
|
41
|
+
end = segment['end']
|
|
42
|
+
speech_segments.append(noisy[start:end])
|
|
43
|
+
speech = np.concatenate(speech_segments, axis=0)
|
|
44
|
+
|
|
45
|
+
# 提取非语音段
|
|
46
|
+
noise_segments = []
|
|
47
|
+
noise_point_list = vad2nad(speech_vad, len(noisy))
|
|
48
|
+
for noise_point in noise_point_list:
|
|
49
|
+
noise_segments.append(noisy[noise_point['start']:noise_point['end']])
|
|
50
|
+
noise = np.concatenate(noise_segments, axis=0)
|
|
51
|
+
|
|
52
|
+
P_speech_noise = np.mean(speech ** 2) # 语音+噪声功率
|
|
53
|
+
P_noise = max(np.mean(noise ** 2), EPS) # 纯噪声功率
|
|
54
|
+
|
|
55
|
+
# 计算净语音功率
|
|
56
|
+
P_speech = max(P_speech_noise - P_noise, EPS)
|
|
57
|
+
snr = 10 * np.log10(P_speech / P_noise)
|
|
58
|
+
|
|
59
|
+
return snr
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def seg_snr(clean, noisy, frame_length: int, hop_length: int):
|
|
63
|
+
"""
|
|
64
|
+
分帧计算信噪比
|
|
65
|
+
Args:
|
|
66
|
+
clean: 干净音频, numpy array
|
|
67
|
+
noisy: 带噪音频, numpy array
|
|
68
|
+
frame_length: 帧长
|
|
69
|
+
hop_length: 帧移
|
|
70
|
+
Returns:
|
|
71
|
+
snr_mean: 平均信噪比, float
|
|
72
|
+
Raises:
|
|
73
|
+
ValueError: 当输入参数不合法时抛出
|
|
74
|
+
"""
|
|
75
|
+
assert clean.shape == noisy.shape, "clean和noisy的维度不一样"
|
|
76
|
+
|
|
77
|
+
# 分帧
|
|
78
|
+
clean_frames = librosa.util.frame(clean, frame_length=frame_length, hop_length=hop_length) # (frame_length, n_frames)
|
|
79
|
+
noisy_frames = librosa.util.frame(noisy, frame_length=frame_length, hop_length=hop_length) # (frame_length, n_frames)
|
|
80
|
+
|
|
81
|
+
# 计算每帧的信噪比
|
|
82
|
+
snr_frames = []
|
|
83
|
+
for i in range(clean_frames.shape[1]):
|
|
84
|
+
clean_frame = clean_frames[:, i]
|
|
85
|
+
noisy_frame = noisy_frames[:, i]
|
|
86
|
+
# 跳过静音帧
|
|
87
|
+
if np.all(np.abs(clean_frame) < 1e-6) or np.all(np.abs(noisy_frame) < 1e-6):
|
|
88
|
+
continue
|
|
89
|
+
snr_frames.append(get_snr(clean_frame, noisy_frame))
|
|
90
|
+
|
|
91
|
+
# 如果所有帧都是静音
|
|
92
|
+
if not snr_frames:
|
|
93
|
+
return float('-inf')
|
|
94
|
+
|
|
95
|
+
return np.mean(snr_frames)
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def psnr(clean, noisy, max_val=None):
|
|
99
|
+
"""
|
|
100
|
+
计算峰值信噪比
|
|
101
|
+
Args:
|
|
102
|
+
clean: 干净音频, numpy array
|
|
103
|
+
noisy: 带噪音频, numpy array
|
|
104
|
+
max_val: 信号最大值, 如果为None则使用clean信号的实际最大值
|
|
105
|
+
Returns:
|
|
106
|
+
psnr: 峰值信噪比, 单位dB
|
|
107
|
+
"""
|
|
108
|
+
assert clean.shape == noisy.shape, "clean和noisy的维度不一样"
|
|
109
|
+
|
|
110
|
+
# 如果没有指定最大值, 使用clean信号的实际最大值
|
|
111
|
+
if max_val is None:
|
|
112
|
+
max_val = np.abs(clean).max()
|
|
113
|
+
|
|
114
|
+
# 计算均方误差 (MSE)
|
|
115
|
+
mse = np.mean((clean - noisy) ** 2)
|
|
116
|
+
|
|
117
|
+
# 避免除以0
|
|
118
|
+
if mse == 0:
|
|
119
|
+
return float('inf')
|
|
120
|
+
|
|
121
|
+
# 计算PSNR
|
|
122
|
+
psnr = 10 * np.log10(max_val**2 / mse)
|
|
123
|
+
return psnr
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def si_sdr(reference, estimate, epsilon=1e-8):
|
|
127
|
+
"""
|
|
128
|
+
计算尺度不变信噪比 (Scale-Invariant Signal-to-Distortion Ratio, SI-SDR)。
|
|
129
|
+
|
|
130
|
+
Args:
|
|
131
|
+
reference (np.ndarray): 原始的、干净的参考信号 (一维数组)。
|
|
132
|
+
estimate (np.ndarray): 模型估计或处理后的信号 (一维数组)。
|
|
133
|
+
epsilon (float): 一个非常小的数值, 用于防止分母为零, 保证数值稳定性。
|
|
134
|
+
|
|
135
|
+
Returns:
|
|
136
|
+
float: SI-SDR 值, 单位为分贝 (dB)。
|
|
137
|
+
"""
|
|
138
|
+
assert reference.shape == estimate.shape, "reference和estimate的维度不一样"
|
|
139
|
+
|
|
140
|
+
# 2. 零均值化 (可选但推荐)
|
|
141
|
+
# 移除直流分量, 使计算更关注信号的动态变化
|
|
142
|
+
reference = reference - np.mean(reference)
|
|
143
|
+
estimate = estimate - np.mean(estimate)
|
|
144
|
+
|
|
145
|
+
# 3. 计算目标信号分量 (s_target)
|
|
146
|
+
# s_target 是 estimate 在 reference 上的投影
|
|
147
|
+
# 公式: s_target = (<ŝ, s> / ||s||²) * s
|
|
148
|
+
dot_product = np.dot(estimate, reference) # <ŝ, s> (点积)
|
|
149
|
+
norm_s_squared = np.dot(reference, reference) # ||s||² (s的能量)
|
|
150
|
+
|
|
151
|
+
# 检查参考信号能量, 避免除以零
|
|
152
|
+
if norm_s_squared < epsilon:
|
|
153
|
+
# 如果参考信号几乎是静音, SI-SDR没有意义
|
|
154
|
+
return -np.inf # 返回负无穷或np.nan
|
|
155
|
+
|
|
156
|
+
alpha = dot_product / (norm_s_squared + epsilon) # 最佳缩放因子 α
|
|
157
|
+
s_target = alpha * reference
|
|
158
|
+
|
|
159
|
+
# 4. 计算误差/失真分量 (e_noise)
|
|
160
|
+
e_noise = estimate - s_target
|
|
161
|
+
|
|
162
|
+
# 5. 计算 SI-SDR
|
|
163
|
+
# SI-SDR = 10 * log10 ( ||s_target||² / ||e_noise||² )
|
|
164
|
+
power_s_target = np.sum(s_target**2) # ||s_target||²
|
|
165
|
+
power_e_noise = np.sum(e_noise**2) # ||e_noise||²
|
|
166
|
+
|
|
167
|
+
# 同样加上 epsilon 防止除以零
|
|
168
|
+
if power_e_noise < epsilon:
|
|
169
|
+
# 如果噪声能量极小, 说明匹配得非常好
|
|
170
|
+
return np.inf # 返回正无穷
|
|
171
|
+
|
|
172
|
+
si_sdr_val = 10 * np.log10(power_s_target / (power_e_noise + epsilon))
|
|
173
|
+
|
|
174
|
+
return si_sdr_val
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
if __name__ == "__main__":
|
|
178
|
+
# 生成测试信号
|
|
179
|
+
speech = np.random.randn(1000)
|
|
180
|
+
noise = np.random.randn(1000) * 0.1 # 较小的噪声
|
|
181
|
+
noisy = speech + noise
|
|
182
|
+
|
|
183
|
+
# 测试各种信噪比计算方法
|
|
184
|
+
print(f"SNR: {get_snr(speech, noise):.2f} dB")
|
|
185
|
+
print(f"Segmental SNR: {seg_snr(speech, noisy, 100, 50):.2f} dB")
|
|
186
|
+
print(f"PSNR: {psnr(speech, noisy):.2f} dB")
|
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
import librosa
|
|
2
|
+
import numpy as np
|
|
3
|
+
from neverlib.vad.utils import vad2nad
|
|
4
|
+
from neverlib.filter import HPFilter
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def get_snr(speech, noise, hpf=False, sr=16000, order=6, cutoff=100):
|
|
8
|
+
"""计算信噪比
|
|
9
|
+
Args:
|
|
10
|
+
speech: 语音音频
|
|
11
|
+
noise: 噪声音频
|
|
12
|
+
Returns:
|
|
13
|
+
snr: 信噪比
|
|
14
|
+
"""
|
|
15
|
+
assert speech.ndim == noise.ndim, "speech和noise的维度不一样"
|
|
16
|
+
if hpf:
|
|
17
|
+
speech = HPFilter(speech, sr=sr, order=order, cutoff=cutoff)
|
|
18
|
+
noise = HPFilter(noise, sr=sr, order=order, cutoff=cutoff)
|
|
19
|
+
|
|
20
|
+
power_speech = np.mean(speech**2)
|
|
21
|
+
power_noise = max(np.mean(noise**2), 1e-10)
|
|
22
|
+
|
|
23
|
+
snr = 10 * np.log10(power_speech / power_noise)
|
|
24
|
+
return snr
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def get_snr_from_noisy(noisy, speech_vad=None):
|
|
28
|
+
"""根据带噪音频计算信噪比
|
|
29
|
+
Args:
|
|
30
|
+
noisy: 带噪音频
|
|
31
|
+
speech_vad: [{start:xxx, end:xxx}, ...]
|
|
32
|
+
Returns:
|
|
33
|
+
snr: 信噪比
|
|
34
|
+
"""
|
|
35
|
+
assert speech_vad is not None, "speech_vad不能为空"
|
|
36
|
+
|
|
37
|
+
# 提取语音段
|
|
38
|
+
speech_segments = []
|
|
39
|
+
for segment in speech_vad:
|
|
40
|
+
start = segment['start']
|
|
41
|
+
end = segment['end']
|
|
42
|
+
speech_segments.append(noisy[start:end])
|
|
43
|
+
speech = np.concatenate(speech_segments, axis=0)
|
|
44
|
+
|
|
45
|
+
# 提取非语音段
|
|
46
|
+
noise_segments = []
|
|
47
|
+
noise_point_list = vad2nad(speech_vad, len(noisy))
|
|
48
|
+
for noise_point in noise_point_list:
|
|
49
|
+
noise_segments.append(noisy[noise_point['start']:noise_point['end']])
|
|
50
|
+
noise = np.concatenate(noise_segments, axis=0)
|
|
51
|
+
|
|
52
|
+
P_speech_noise = np.mean(speech ** 2) # 语音+噪声功率
|
|
53
|
+
P_noise = max(np.mean(noise ** 2), EPS) # 纯噪声功率
|
|
54
|
+
|
|
55
|
+
# 计算净语音功率
|
|
56
|
+
P_speech = max(P_speech_noise - P_noise, EPS)
|
|
57
|
+
snr = 10 * np.log10(P_speech / P_noise)
|
|
58
|
+
|
|
59
|
+
return snr
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def seg_snr(clean, noisy, frame_length: int, hop_length: int):
|
|
63
|
+
"""
|
|
64
|
+
分帧计算信噪比
|
|
65
|
+
Args:
|
|
66
|
+
clean: 干净音频, numpy array
|
|
67
|
+
noisy: 带噪音频, numpy array
|
|
68
|
+
frame_length: 帧长
|
|
69
|
+
hop_length: 帧移
|
|
70
|
+
Returns:
|
|
71
|
+
snr_mean: 平均信噪比, float
|
|
72
|
+
Raises:
|
|
73
|
+
ValueError: 当输入参数不合法时抛出
|
|
74
|
+
"""
|
|
75
|
+
assert clean.shape == noisy.shape, "clean和noisy的维度不一样"
|
|
76
|
+
|
|
77
|
+
# 分帧
|
|
78
|
+
clean_frames = librosa.util.frame(clean, frame_length=frame_length, hop_length=hop_length) # (frame_length, n_frames)
|
|
79
|
+
noisy_frames = librosa.util.frame(noisy, frame_length=frame_length, hop_length=hop_length) # (frame_length, n_frames)
|
|
80
|
+
|
|
81
|
+
# 计算每帧的信噪比
|
|
82
|
+
snr_frames = []
|
|
83
|
+
for i in range(clean_frames.shape[1]):
|
|
84
|
+
clean_frame = clean_frames[:, i]
|
|
85
|
+
noisy_frame = noisy_frames[:, i]
|
|
86
|
+
# 跳过静音帧
|
|
87
|
+
if np.all(np.abs(clean_frame) < 1e-6) or np.all(np.abs(noisy_frame) < 1e-6):
|
|
88
|
+
continue
|
|
89
|
+
snr_frames.append(get_snr(clean_frame, noisy_frame))
|
|
90
|
+
|
|
91
|
+
# 如果所有帧都是静音
|
|
92
|
+
if not snr_frames:
|
|
93
|
+
return float('-inf')
|
|
94
|
+
|
|
95
|
+
return np.mean(snr_frames)
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def psnr(clean, noisy, max_val=None):
|
|
99
|
+
"""
|
|
100
|
+
计算峰值信噪比
|
|
101
|
+
Args:
|
|
102
|
+
clean: 干净音频, numpy array
|
|
103
|
+
noisy: 带噪音频, numpy array
|
|
104
|
+
max_val: 信号最大值, 如果为None则使用clean信号的实际最大值
|
|
105
|
+
Returns:
|
|
106
|
+
psnr: 峰值信噪比, 单位dB
|
|
107
|
+
"""
|
|
108
|
+
assert clean.shape == noisy.shape, "clean和noisy的维度不一样"
|
|
109
|
+
|
|
110
|
+
# 如果没有指定最大值, 使用clean信号的实际最大值
|
|
111
|
+
if max_val is None:
|
|
112
|
+
max_val = np.abs(clean).max()
|
|
113
|
+
|
|
114
|
+
# 计算均方误差 (MSE)
|
|
115
|
+
mse = np.mean((clean - noisy) ** 2)
|
|
116
|
+
|
|
117
|
+
# 避免除以0
|
|
118
|
+
if mse == 0:
|
|
119
|
+
return float('inf')
|
|
120
|
+
|
|
121
|
+
# 计算PSNR
|
|
122
|
+
psnr = 10 * np.log10(max_val**2 / mse)
|
|
123
|
+
return psnr
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def si_sdr(reference, estimate, epsilon=1e-8):
|
|
127
|
+
"""
|
|
128
|
+
计算尺度不变信噪比 (Scale-Invariant Signal-to-Distortion Ratio, SI-SDR)。
|
|
129
|
+
|
|
130
|
+
Args:
|
|
131
|
+
reference (np.ndarray): 原始的、干净的参考信号 (一维数组)。
|
|
132
|
+
estimate (np.ndarray): 模型估计或处理后的信号 (一维数组)。
|
|
133
|
+
epsilon (float): 一个非常小的数值, 用于防止分母为零, 保证数值稳定性。
|
|
134
|
+
|
|
135
|
+
Returns:
|
|
136
|
+
float: SI-SDR 值, 单位为分贝 (dB)。
|
|
137
|
+
"""
|
|
138
|
+
assert reference.shape == estimate.shape, "reference和estimate的维度不一样"
|
|
139
|
+
|
|
140
|
+
# 2. 零均值化 (可选但推荐)
|
|
141
|
+
# 移除直流分量, 使计算更关注信号的动态变化
|
|
142
|
+
reference = reference - np.mean(reference)
|
|
143
|
+
estimate = estimate - np.mean(estimate)
|
|
144
|
+
|
|
145
|
+
# 3. 计算目标信号分量 (s_target)
|
|
146
|
+
# s_target 是 estimate 在 reference 上的投影
|
|
147
|
+
# 公式: s_target = (<ŝ, s> / ||s||²) * s
|
|
148
|
+
dot_product = np.dot(estimate, reference) # <ŝ, s> (点积)
|
|
149
|
+
norm_s_squared = np.dot(reference, reference) # ||s||² (s的能量)
|
|
150
|
+
|
|
151
|
+
# 检查参考信号能量, 避免除以零
|
|
152
|
+
if norm_s_squared < epsilon:
|
|
153
|
+
# 如果参考信号几乎是静音, SI-SDR没有意义
|
|
154
|
+
return -np.inf # 返回负无穷或np.nan
|
|
155
|
+
|
|
156
|
+
alpha = dot_product / (norm_s_squared + epsilon) # 最佳缩放因子 α
|
|
157
|
+
s_target = alpha * reference
|
|
158
|
+
|
|
159
|
+
# 4. 计算误差/失真分量 (e_noise)
|
|
160
|
+
e_noise = estimate - s_target
|
|
161
|
+
|
|
162
|
+
# 5. 计算 SI-SDR
|
|
163
|
+
# SI-SDR = 10 * log10 ( ||s_target||² / ||e_noise||² )
|
|
164
|
+
power_s_target = np.sum(s_target**2) # ||s_target||²
|
|
165
|
+
power_e_noise = np.sum(e_noise**2) # ||e_noise||²
|
|
166
|
+
|
|
167
|
+
# 同样加上 epsilon 防止除以零
|
|
168
|
+
if power_e_noise < epsilon:
|
|
169
|
+
# 如果噪声能量极小, 说明匹配得非常好
|
|
170
|
+
return np.inf # 返回正无穷
|
|
171
|
+
|
|
172
|
+
si_sdr_val = 10 * np.log10(power_s_target / (power_e_noise + epsilon))
|
|
173
|
+
|
|
174
|
+
return si_sdr_val
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
if __name__ == "__main__":
|
|
178
|
+
# 生成测试信号
|
|
179
|
+
speech = np.random.randn(1000)
|
|
180
|
+
noise = np.random.randn(1000) * 0.1 # 较小的噪声
|
|
181
|
+
noisy = speech + noise
|
|
182
|
+
|
|
183
|
+
# 测试各种信噪比计算方法
|
|
184
|
+
print(f"SNR: {get_snr(speech, noise):.2f} dB")
|
|
185
|
+
print(f"Segmental SNR: {seg_snr(speech, noisy, 100, 50):.2f} dB")
|
|
186
|
+
print(f"PSNR: {psnr(speech, noisy):.2f} dB")
|