neverlib 0.2.3__py3-none-any.whl → 0.2.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (207) hide show
  1. neverlib/.history/Docs/audio_aug/test_snr_20250806011311.py +0 -0
  2. neverlib/.history/Docs/audio_aug/test_snr_20250806011331.py +75 -0
  3. neverlib/.history/Docs/audio_aug/test_snr_20250806011342.py +57 -0
  4. neverlib/.history/Docs/audio_aug/test_snr_20250806011352.py +57 -0
  5. neverlib/.history/Docs/audio_aug/test_snr_20250806011403.py +57 -0
  6. neverlib/.history/Docs/audio_aug/test_snr_20250806011413.py +57 -0
  7. neverlib/.history/Docs/audio_aug/test_snr_20250806011435.py +55 -0
  8. neverlib/.history/Docs/vad/1_20250810032405.py +0 -0
  9. neverlib/.history/Docs/vad/1_20250810032417.py +39 -0
  10. neverlib/.history/audio_aug/audio_aug_20250806010451.py +125 -0
  11. neverlib/.history/audio_aug/audio_aug_20250806010750.py +138 -0
  12. neverlib/.history/audio_aug/audio_aug_20250806010759.py +140 -0
  13. neverlib/.history/audio_aug/audio_aug_20250806010803.py +140 -0
  14. neverlib/.history/audio_aug/audio_aug_20250806010809.py +140 -0
  15. neverlib/.history/audio_aug/audio_aug_20250806011108.py +140 -0
  16. neverlib/.history/dataAnalyze/__init___20250806204125.py +14 -0
  17. neverlib/.history/dataAnalyze/__init___20250806204139.py +14 -0
  18. neverlib/.history/dataAnalyze/__init___20250806204159.py +14 -0
  19. neverlib/.history/filter/__init___20250820103351.py +70 -0
  20. neverlib/.history/filter/__init___20250821102348.py +70 -0
  21. neverlib/.history/filter/__init___20250821102405.py +14 -0
  22. neverlib/.history/filter/auto_eq/__init___20250819213121.py +36 -0
  23. neverlib/.history/filter/auto_eq/__init___20250821102241.py +36 -0
  24. neverlib/.history/filter/auto_eq/__init___20250821102259.py +36 -0
  25. neverlib/.history/filter/auto_eq/__init___20250821102307.py +36 -0
  26. neverlib/.history/filter/auto_eq/__init___20250821102310.py +36 -0
  27. neverlib/.history/filter/auto_eq/__init___20250821102318.py +36 -0
  28. neverlib/.history/filter/auto_eq/__init___20250821102507.py +36 -0
  29. neverlib/{filter/AudoEQ/auto_eq_de.py → .history/filter/auto_eq/de_eq_20250820103848.py} +1 -1
  30. neverlib/.history/filter/auto_eq/de_eq_20250821102422.py +360 -0
  31. neverlib/.history/filter/auto_eq/freq_eq_20250820140732.py +75 -0
  32. neverlib/.history/filter/auto_eq/freq_eq_20250820140745.py +75 -0
  33. neverlib/.history/filter/auto_eq/freq_eq_20250820140816.py +75 -0
  34. neverlib/.history/filter/auto_eq/freq_eq_20250820140938.py +77 -0
  35. neverlib/.history/filter/auto_eq/freq_eq_20250820141003.py +77 -0
  36. neverlib/.history/filter/auto_eq/freq_eq_20250820141006.py +77 -0
  37. neverlib/.history/filter/auto_eq/freq_eq_20250820141019.py +77 -0
  38. neverlib/.history/filter/auto_eq/freq_eq_20250820141049.py +77 -0
  39. neverlib/.history/filter/auto_eq/freq_eq_20250820141211.py +77 -0
  40. neverlib/.history/filter/auto_eq/freq_eq_20250820141227.py +77 -0
  41. neverlib/.history/filter/auto_eq/freq_eq_20250820141311.py +78 -0
  42. neverlib/.history/filter/auto_eq/freq_eq_20250820141340.py +78 -0
  43. neverlib/.history/filter/auto_eq/freq_eq_20250820141712.py +78 -0
  44. neverlib/.history/filter/auto_eq/freq_eq_20250820141733.py +78 -0
  45. neverlib/.history/filter/auto_eq/freq_eq_20250820141755.py +78 -0
  46. neverlib/.history/filter/auto_eq/freq_eq_20250821102434.py +76 -0
  47. neverlib/.history/filter/auto_eq/freq_eq_20250821102500.py +76 -0
  48. neverlib/.history/filter/auto_eq/freq_eq_20250821102502.py +76 -0
  49. neverlib/{filter/AudoEQ/auto_eq_ga_basic.py → .history/filter/auto_eq/ga_eq_basic_20250820102957.py} +1 -1
  50. neverlib/.history/filter/auto_eq/ga_eq_basic_20250820113054.py +380 -0
  51. neverlib/.history/filter/auto_eq/ga_eq_basic_20250820113150.py +380 -0
  52. neverlib/.history/filter/auto_eq/ga_eq_basic_20250820113520.py +385 -0
  53. neverlib/.history/filter/auto_eq/ga_eq_basic_20250820113525.py +385 -0
  54. neverlib/.history/filter/auto_eq/ga_eq_basic_20250821102212.py +385 -0
  55. neverlib/.history/metrics/dnsmos_20250806001612.py +160 -0
  56. neverlib/.history/metrics/dnsmos_20250815180659.py +160 -0
  57. neverlib/.history/metrics/dnsmos_20250815180701.py +158 -0
  58. neverlib/.history/metrics/dnsmos_20250815181321.py +154 -0
  59. neverlib/.history/metrics/dnsmos_20250815181327.py +154 -0
  60. neverlib/.history/metrics/dnsmos_20250815181331.py +154 -0
  61. neverlib/.history/metrics/dnsmos_20250815181620.py +154 -0
  62. neverlib/.history/metrics/dnsmos_20250815181631.py +154 -0
  63. neverlib/.history/metrics/dnsmos_20250815181742.py +154 -0
  64. neverlib/.history/metrics/dnsmos_20250815181824.py +153 -0
  65. neverlib/.history/metrics/dnsmos_20250815181834.py +153 -0
  66. neverlib/.history/metrics/dnsmos_20250815181922.py +153 -0
  67. neverlib/.history/metrics/dnsmos_20250815182011.py +147 -0
  68. neverlib/.history/metrics/dnsmos_20250815182036.py +144 -0
  69. neverlib/.history/metrics/dnsmos_20250815182936.py +143 -0
  70. neverlib/.history/metrics/dnsmos_20250815182942.py +143 -0
  71. neverlib/.history/metrics/dnsmos_20250815183032.py +137 -0
  72. neverlib/.history/metrics/dnsmos_20250815183101.py +144 -0
  73. neverlib/.history/metrics/dnsmos_20250815183121.py +144 -0
  74. neverlib/.history/metrics/dnsmos_20250815183123.py +143 -0
  75. neverlib/.history/metrics/dnsmos_20250815183214.py +143 -0
  76. neverlib/.history/metrics/dnsmos_20250815183240.py +143 -0
  77. neverlib/.history/metrics/dnsmos_20250815183248.py +144 -0
  78. neverlib/.history/metrics/dnsmos_20250815183407.py +142 -0
  79. neverlib/.history/metrics/dnsmos_20250815183409.py +142 -0
  80. neverlib/.history/metrics/dnsmos_20250815183431.py +142 -0
  81. neverlib/.history/metrics/dnsmos_20250815183507.py +140 -0
  82. neverlib/.history/metrics/dnsmos_20250815183513.py +139 -0
  83. neverlib/.history/metrics/dnsmos_20250815183618.py +139 -0
  84. neverlib/.history/metrics/dnsmos_20250815183709.py +140 -0
  85. neverlib/.history/metrics/dnsmos_20250815183756.py +137 -0
  86. neverlib/.history/metrics/dnsmos_20250815183815.py +128 -0
  87. neverlib/.history/metrics/dnsmos_20250815183827.py +129 -0
  88. neverlib/.history/metrics/dnsmos_20250815183913.py +117 -0
  89. neverlib/.history/metrics/dnsmos_20250815183914.py +117 -0
  90. neverlib/.history/metrics/dnsmos_20250815184003.py +118 -0
  91. neverlib/.history/metrics/dnsmos_20250815184040.py +118 -0
  92. neverlib/.history/metrics/dnsmos_20250815184049.py +118 -0
  93. neverlib/.history/metrics/dnsmos_20250815184104.py +117 -0
  94. neverlib/.history/metrics/dnsmos_20250815184200.py +117 -0
  95. neverlib/.history/metrics/lpc_lsp_metric_20250816015944.py +128 -0
  96. neverlib/.history/metrics/lpc_lsp_metric_20250816020142.py +128 -0
  97. neverlib/.history/metrics/lpc_lsp_metric_20250816020156.py +128 -0
  98. neverlib/.history/metrics/lpc_lsp_metric_20250816020554.py +130 -0
  99. neverlib/.history/metrics/lpc_lsp_metric_20250816020600.py +125 -0
  100. neverlib/.history/metrics/lpc_lsp_metric_20250816020631.py +120 -0
  101. neverlib/.history/metrics/lpc_lsp_metric_20250816020746.py +118 -0
  102. neverlib/.history/metrics/lpc_me_20250816013111.py +0 -0
  103. neverlib/.history/metrics/lpc_me_20250816013129.py +121 -0
  104. neverlib/.history/metrics/lpc_me_20250816015430.py +103 -0
  105. neverlib/.history/metrics/lpc_me_20250816015535.py +96 -0
  106. neverlib/.history/metrics/lpc_me_20250816015542.py +96 -0
  107. neverlib/.history/metrics/lpc_me_20250816015636.py +97 -0
  108. neverlib/.history/metrics/lpc_me_20250816015658.py +104 -0
  109. neverlib/.history/metrics/lpc_me_20250816015703.py +100 -0
  110. neverlib/.history/metrics/lpc_me_20250816015945.py +128 -0
  111. neverlib/.history/metrics/snr_20250806010538.py +177 -0
  112. neverlib/.history/metrics/snr_20250806211634.py +184 -0
  113. neverlib/.history/metrics/spec_20250805234209.py +45 -0
  114. neverlib/.history/metrics/spec_20250816135530.py +11 -0
  115. neverlib/.history/metrics/spec_20250816135654.py +16 -0
  116. neverlib/.history/metrics/spec_20250816135736.py +68 -0
  117. neverlib/.history/metrics/spec_20250816135904.py +75 -0
  118. neverlib/.history/metrics/spec_20250816135921.py +82 -0
  119. neverlib/.history/metrics/spec_20250816140111.py +82 -0
  120. neverlib/.history/metrics/spec_20250816140543.py +136 -0
  121. neverlib/.history/metrics/spec_20250816140559.py +172 -0
  122. neverlib/.history/metrics/spec_20250816140602.py +172 -0
  123. neverlib/.history/metrics/spec_20250816140608.py +172 -0
  124. neverlib/.history/metrics/spec_20250816140654.py +148 -0
  125. neverlib/.history/metrics/spec_20250816140705.py +144 -0
  126. neverlib/.history/metrics/spec_20250816140755.py +138 -0
  127. neverlib/.history/metrics/spec_20250816140823.py +170 -0
  128. neverlib/.history/metrics/spec_20250816140832.py +170 -0
  129. neverlib/.history/metrics/spec_20250816140833.py +170 -0
  130. neverlib/.history/metrics/spec_20250816140922.py +147 -0
  131. neverlib/.history/metrics/spec_20250816141148.py +107 -0
  132. neverlib/.history/metrics/spec_20250816141219.py +123 -0
  133. neverlib/.history/metrics/spec_20250816141732.py +178 -0
  134. neverlib/.history/metrics/spec_20250816141740.py +178 -0
  135. neverlib/.history/metrics/spec_20250816142030.py +178 -0
  136. neverlib/.history/metrics/spec_20250816142107.py +135 -0
  137. neverlib/.history/metrics/spec_20250816142126.py +135 -0
  138. neverlib/.history/metrics/spec_20250816142410.py +135 -0
  139. neverlib/.history/metrics/spec_20250816142415.py +136 -0
  140. neverlib/.history/metrics/spec_metric_20250816135156.py +0 -0
  141. neverlib/.history/metrics/spec_metric_20250816135226.py +5 -0
  142. neverlib/.history/metrics/spec_metric_20250816135227.py +10 -0
  143. neverlib/.history/metrics/spec_metric_20250816135306.py +15 -0
  144. neverlib/.history/metrics/spec_metric_20250816135442.py +31 -0
  145. neverlib/.history/metrics/spec_metric_20250816135448.py +31 -0
  146. neverlib/.history/metrics/spec_metric_20250816135520.py +29 -0
  147. neverlib/.history/metrics/spec_metric_20250816135537.py +63 -0
  148. neverlib/.history/metrics/spec_metric_20250816135653.py +65 -0
  149. neverlib/.history/vad/PreProcess_20250805234211.py +63 -0
  150. neverlib/.history/vad/PreProcess_20250809232455.py +63 -0
  151. neverlib/.history/vad/PreProcess_20250816020725.py +66 -0
  152. neverlib/.history/vad/VAD_Silero_20250805234211.py +50 -0
  153. neverlib/.history/vad/VAD_Silero_20250809232456.py +50 -0
  154. neverlib/.history/vad/VAD_WebRTC_20250805234211.py +61 -0
  155. neverlib/.history/vad/VAD_WebRTC_20250809232456.py +61 -0
  156. neverlib/.history/vad/VAD_funasr_20250805234211.py +54 -0
  157. neverlib/.history/vad/VAD_funasr_20250809232456.py +54 -0
  158. neverlib/.history/vad/VAD_vadlib_20250805234211.py +70 -0
  159. neverlib/.history/vad/VAD_vadlib_20250809232455.py +70 -0
  160. neverlib/.history/vad/VAD_whisper_20250805234211.py +55 -0
  161. neverlib/.history/vad/VAD_whisper_20250809232456.py +55 -0
  162. neverlib/.specstory/.what-is-this.md +69 -0
  163. neverlib/.specstory/history/2025-08-05_17-06Z-/350/277/231/344/270/200/346/255/245/347/232/204/347/233/256/347/232/204/346/230/257/344/273/200/344/271/210.md +424 -0
  164. neverlib/Docs/audio_aug/test_snr.py +55 -0
  165. neverlib/audio_aug/HarmonicDistortion.py +79 -0
  166. neverlib/audio_aug/TFDrop.py +41 -0
  167. neverlib/audio_aug/TFMask.py +56 -0
  168. neverlib/audio_aug/audio_aug.py +16 -1
  169. neverlib/audio_aug/clip_aug.py +41 -0
  170. neverlib/audio_aug/coder_aug.py +209 -0
  171. neverlib/audio_aug/coder_aug2.py +118 -0
  172. neverlib/audio_aug/loss_packet_aug.py +103 -0
  173. neverlib/audio_aug/quant_aug.py +78 -0
  174. neverlib/data_analyze/__init__.py +14 -0
  175. neverlib/filter/auto_eq/__init__.py +36 -0
  176. neverlib/filter/auto_eq/de_eq.py +360 -0
  177. neverlib/filter/auto_eq/freq_eq.py +76 -0
  178. neverlib/filter/{AudoEQ/auto_eq_ga_advanced.py → auto_eq/ga_eq_advanced.py} +1 -1
  179. neverlib/filter/auto_eq/ga_eq_basic.py +385 -0
  180. neverlib/metrics/dnsmos.py +58 -101
  181. neverlib/metrics/lpc_lsp.py +118 -0
  182. neverlib/metrics/snr.py +11 -4
  183. neverlib/metrics/spec.py +136 -45
  184. neverlib/utils/utils.py +17 -14
  185. neverlib/vad/PreProcess.py +5 -2
  186. neverlib/vad/VAD_Silero.py +1 -1
  187. neverlib/vad/VAD_WebRTC.py +1 -1
  188. neverlib/vad/VAD_funasr.py +1 -1
  189. neverlib/vad/VAD_vadlib.py +1 -1
  190. neverlib/vad/VAD_whisper.py +1 -1
  191. {neverlib-0.2.3.dist-info → neverlib-0.2.4.dist-info}/METADATA +1 -1
  192. neverlib-0.2.4.dist-info/RECORD +229 -0
  193. neverlib-0.2.3.dist-info/RECORD +0 -53
  194. /neverlib/{dataAnalyze/__init__.py → .history/dataAnalyze/__init___20250805234204.py} +0 -0
  195. /neverlib/{filter/AudoEQ/auto_eq_spectral_direct.py → .history/filter/auto_eq/freq_eq_20250805234206.py} +0 -0
  196. /neverlib/{dataAnalyze → data_analyze}/README.md +0 -0
  197. /neverlib/{dataAnalyze → data_analyze}/dataset_analyzer.py +0 -0
  198. /neverlib/{dataAnalyze → data_analyze}/quality_metrics.py +0 -0
  199. /neverlib/{dataAnalyze → data_analyze}/rms_distrubution.py +0 -0
  200. /neverlib/{dataAnalyze → data_analyze}/spectral_analysis.py +0 -0
  201. /neverlib/{dataAnalyze → data_analyze}/statistics.py +0 -0
  202. /neverlib/{dataAnalyze → data_analyze}/temporal_features.py +0 -0
  203. /neverlib/{dataAnalyze → data_analyze}/visualization.py +0 -0
  204. /neverlib/filter/{AudoEQ → auto_eq}/README.md +0 -0
  205. {neverlib-0.2.3.dist-info → neverlib-0.2.4.dist-info}/WHEEL +0 -0
  206. {neverlib-0.2.3.dist-info → neverlib-0.2.4.dist-info}/licenses/LICENSE +0 -0
  207. {neverlib-0.2.3.dist-info → neverlib-0.2.4.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,63 @@
1
+ '''
2
+ Author: 凌逆战 | Never
3
+ Date: 2025-08-16 13:51:57
4
+ Description:
5
+ '''
6
+
7
+ import librosa
8
+ import numpy as np
9
+ import soundfile as sf
10
+
11
+ def lsd(reference, estimate, n_fft=2048, hop_length=512, win_length=None):
12
+ """
13
+ 计算两个一维音频信号之间的对数谱距离 (Log-Spectral Distance, LSD)。
14
+ 该实现遵循标准的LSD定义: 整体均方根误差。
15
+
16
+ Args:
17
+ reference (np.ndarray): 原始的、干净的参考信号 (一维数组)。
18
+ estimate (np.ndarray): 模型估计或处理后的信号 (一维数组)。
19
+ n_fft (int): FFT点数, 决定了频率分辨率。
20
+ hop_length (int): 帧移, 决定了时间分辨率。
21
+ win_length (int, optional): 窗长。如果为None, 则默认为n_fft。
22
+ epsilon (float): 一个非常小的数值, 用于防止对零取对数, 保证数值稳定性。
23
+
24
+ Returns:
25
+ float: 对数谱距离值, 单位为分贝 (dB)。
26
+ """
27
+ assert reference.ndim == 1 and estimate.ndim == 1, "输入信号必须是一维数组。"
28
+
29
+ if win_length is None:
30
+ win_length = n_fft
31
+
32
+ reference_stft = librosa.stft(reference, n_fft=n_fft, hop_length=hop_length, win_length=win_length) # (F,T)
33
+ estimate_stft = librosa.stft(estimate, n_fft=n_fft, hop_length=hop_length, win_length=win_length) # (F,T)
34
+
35
+ reference_power_spec = np.abs(reference_stft) ** 2 # (F,T)
36
+ estimate_power_spec = np.abs(estimate_stft) ** 2 # (F,T)
37
+
38
+ reference_log_power_spec = 10 * np.log10(reference_power_spec + EPS)
39
+ estimate_log_power_spec = 10 * np.log10(estimate_power_spec + EPS)
40
+
41
+ squared_error = (reference_log_power_spec - estimate_log_power_spec) ** 2
42
+ lsd_val = np.sqrt(np.mean(squared_error))
43
+
44
+ return lsd_val
45
+
46
+ def mcd(ref_wav, test_wav, sr=16000):
47
+ """
48
+ 梅尔倒谱距离 Mel-Cepstral Distance
49
+ ref_spec: 参考频谱
50
+ test_spec: 测试频谱
51
+ """
52
+ ref_wav, ref_sr = sf.read(ref_wav)
53
+ test_wav, test_sr = sf.read(test_wav)
54
+ assert ref_sr == test_sr == sr, "采样率必须为16000Hz"
55
+ assert len(ref_wav) == len(test_wav), "音频长度必须相同"
56
+
57
+ ref_mfcc = librosa.feature.mfcc(y=ref_wav, sr=sr)
58
+ test_mfcc = librosa.feature.mfcc(y=test_wav, sr=sr)
59
+
60
+ # 计算 MCD (跳过 0 阶)
61
+ diff = ref_mfcc[1:] - test_mfcc[1:]
62
+ mcd = (10.0 / np.log(10)) * np.sqrt(2 * np.mean(np.sum(diff ** 2, axis=0)))
63
+ return mcd
@@ -0,0 +1,65 @@
1
+ '''
2
+ Author: 凌逆战 | Never
3
+ Date: 2025-08-16 13:51:57
4
+ Description:
5
+ '''
6
+
7
+ import librosa
8
+ import numpy as np
9
+ import soundfile as sf
10
+ from utils import EPS
11
+
12
+
13
+ def lsd(ref_wav, test_wav, n_fft=2048, hop_length=512, win_length=None):
14
+ """
15
+ 计算两个一维音频信号之间的对数谱距离 (Log-Spectral Distance, LSD)。
16
+ 该实现遵循标准的LSD定义: 整体均方根误差。
17
+
18
+ Args:
19
+ ref_wav (np.ndarray): 原始的、干净的参考信号 (一维数组)。
20
+ test_wav (np.ndarray): 模型估计或处理后的信号 (一维数组)。
21
+ n_fft (int): FFT点数, 决定了频率分辨率。
22
+ hop_length (int): 帧移, 决定了时间分辨率。
23
+ win_length (int, optional): 窗长。如果为None, 则默认为n_fft。
24
+ epsilon (float): 一个非常小的数值, 用于防止对零取对数, 保证数值稳定性。
25
+
26
+ Returns:
27
+ float: 对数谱距离值, 单位为分贝 (dB)。
28
+ """
29
+ assert ref_wav.ndim == 1 and test_wav.ndim == 1, "输入信号必须是一维数组。"
30
+
31
+ if win_length is None:
32
+ win_length = n_fft
33
+
34
+ ref_stft = librosa.stft(ref_wav, n_fft=n_fft, hop_length=hop_length, win_length=win_length) # (F,T)
35
+ test_stft = librosa.stft(test_wav, n_fft=n_fft, hop_length=hop_length, win_length=win_length) # (F,T)
36
+
37
+ ref_power_spec = np.abs(ref_stft) ** 2 # (F,T)
38
+ test_power_spec = np.abs(test_stft) ** 2 # (F,T)
39
+
40
+ ref_log_power_spec = 10 * np.log10(ref_power_spec + EPS)
41
+ test_log_power_spec = 10 * np.log10(test_power_spec + EPS)
42
+
43
+ squared_error = (ref_log_power_spec - test_log_power_spec) ** 2
44
+ lsd_val = np.sqrt(np.mean(squared_error))
45
+
46
+ return lsd_val
47
+
48
+ def mcd(ref_wav, test_wav, sr=16000):
49
+ """
50
+ 梅尔倒谱距离 Mel-Cepstral Distance
51
+ ref_spec: 参考频谱
52
+ test_spec: 测试频谱
53
+ """
54
+ ref_wav, ref_sr = sf.read(ref_wav)
55
+ test_wav, test_sr = sf.read(test_wav)
56
+ assert ref_sr == test_sr == sr, "采样率必须为16000Hz"
57
+ assert len(ref_wav) == len(test_wav), "音频长度必须相同"
58
+
59
+ ref_mfcc = librosa.feature.mfcc(y=ref_wav, sr=sr)
60
+ test_mfcc = librosa.feature.mfcc(y=test_wav, sr=sr)
61
+
62
+ # 计算 MCD (跳过 0 阶)
63
+ diff = ref_mfcc[1:] - test_mfcc[1:]
64
+ mcd = (10.0 / np.log(10)) * np.sqrt(2 * np.mean(np.sum(diff ** 2, axis=0)))
65
+ return mcd
@@ -0,0 +1,63 @@
1
+ '''
2
+ Author: 凌逆战 | Never
3
+ Date: 2025-02-13 20:06:07
4
+ LastEditTime: 2025-03-17 16:06:11
5
+ FilePath: \neverlib\vad\PreProcess.py
6
+ Description:
7
+ '''
8
+ # -*- coding:utf-8 -*-
9
+ # Author:凌逆战 | Never
10
+ # Date: 2024/9/14
11
+ """
12
+ 通过一些预处理方法, 来提高VAD的准确率
13
+ """
14
+ import numpy as np
15
+ import noisereduce as nr
16
+
17
+
18
+ def pre_emphasis(audio_data, alpha=0.97):
19
+ # y(n)=x(n)−α⋅x(n−1)
20
+ emphasized_audio = np.append(audio_data[0], audio_data[1:] - alpha * audio_data[:-1])
21
+ return emphasized_audio
22
+
23
+
24
+ def NS(wav, sr=16000, stationary=True, prop_decrease=1.):
25
+ """ 传统降噪 Doc: https://pypi.org/project/noisereduce/
26
+ :param wav: (xxx,) or (channels, xxx)
27
+ :param sr: 采样率
28
+ :param stationary: 平稳降噪还是非平稳降噪
29
+ :param prop_decrease: 0~1, 降噪噪声百分比
30
+ :return:
31
+ """
32
+ if stationary:
33
+ # 平稳噪声抑制 stationary=True
34
+ reduced_noise = nr.reduce_noise(y=wav, sr=sr, stationary=True,
35
+ prop_decrease=prop_decrease, # 降噪噪声的比例
36
+ )
37
+ else:
38
+ # 非平稳噪声抑制 stationary=False
39
+ reduced_noise = nr.reduce_noise(y=wav, sr=sr, stationary=False,
40
+ prop_decrease=prop_decrease,
41
+ )
42
+ return reduced_noise
43
+
44
+
45
+ def NS_test():
46
+ import soundfile as sf
47
+ sr = 16000
48
+ wav_path = "../../data/vad_example.wav"
49
+ wav, wav_sr = sf.read(wav_path, always_2d=False, dtype="float32")
50
+ wav_NS = NS(wav, sr=sr, stationary=True, prop_decrease=0.6)
51
+ sf.write("../../wav_data/000_short_NS.wav", wav_NS, samplerate=sr)
52
+
53
+ # 绘制降噪后的频谱图
54
+ import matplotlib.pyplot as plt
55
+ plt.subplot(211)
56
+ plt.specgram(wav, Fs=sr, scale_by_freq=True, sides='default', cmap="jet")
57
+ plt.subplot(212)
58
+ plt.specgram(wav_NS, Fs=sr, scale_by_freq=True, sides='default', cmap="jet")
59
+ plt.show()
60
+
61
+
62
+ if __name__ == "__main__":
63
+ NS_test()
@@ -0,0 +1,63 @@
1
+ '''
2
+ Author: 凌逆战 | Never
3
+ Date: 2025-02-13 20:06:07
4
+ LastEditTime: 2025-03-17 16:06:11
5
+ FilePath: \neverlib\vad\PreProcess.py
6
+ Description:
7
+ '''
8
+ # -*- coding:utf-8 -*-
9
+ # Author:凌逆战 | Never
10
+ # Date: 2024/9/14
11
+ """
12
+ 通过一些预处理方法, 来提高VAD的准确率
13
+ """
14
+ import numpy as np
15
+ import noisereduce as nr
16
+
17
+
18
+ def pre_emphasis(audio_data, alpha=0.97):
19
+ # y(n)=x(n)−α⋅x(n−1)
20
+ emphasized_audio = np.append(audio_data[0], audio_data[1:] - alpha * audio_data[:-1])
21
+ return emphasized_audio
22
+
23
+
24
+ def NS(wav, sr=16000, stationary=True, prop_decrease=1.):
25
+ """ 传统降噪 Doc: https://pypi.org/project/noisereduce/
26
+ :param wav: (xxx,) or (channels, xxx)
27
+ :param sr: 采样率
28
+ :param stationary: 平稳降噪还是非平稳降噪
29
+ :param prop_decrease: 0~1, 降噪噪声百分比
30
+ :return:
31
+ """
32
+ if stationary:
33
+ # 平稳噪声抑制 stationary=True
34
+ reduced_noise = nr.reduce_noise(y=wav, sr=sr, stationary=True,
35
+ prop_decrease=prop_decrease, # 降噪噪声的比例
36
+ )
37
+ else:
38
+ # 非平稳噪声抑制 stationary=False
39
+ reduced_noise = nr.reduce_noise(y=wav, sr=sr, stationary=False,
40
+ prop_decrease=prop_decrease,
41
+ )
42
+ return reduced_noise
43
+
44
+
45
+ def NS_test():
46
+ import soundfile as sf
47
+ sr = 16000
48
+ wav_path = "../../data/vad_example.wav"
49
+ wav, wav_sr = sf.read(wav_path, always_2d=False, dtype="float32")
50
+ wav_NS = NS(wav, sr=sr, stationary=True, prop_decrease=0.6)
51
+ sf.write("../../wav_data/000_short_NS.wav", wav_NS, samplerate=sr)
52
+
53
+ # 绘制降噪后的频谱图
54
+ import matplotlib.pyplot as plt
55
+ plt.subplot(211)
56
+ plt.specgram(wav, Fs=sr, scale_by_freq=True, sides='default', cmap="jet")
57
+ plt.subplot(212)
58
+ plt.specgram(wav_NS, Fs=sr, scale_by_freq=True, sides='default', cmap="jet")
59
+ plt.show()
60
+
61
+
62
+ if __name__ == "__main__":
63
+ NS_test()
@@ -0,0 +1,66 @@
1
+ '''
2
+ Author: 凌逆战 | Never
3
+ Date: 2025-02-13 20:06:07
4
+ LastEditTime: 2025-08-16 02:07:24
5
+ FilePath: \neverlib\vad\PreProcess.py
6
+ Description:
7
+ '''
8
+ # -*- coding:utf-8 -*-
9
+ # Author:凌逆战 | Never
10
+ # Date: 2024/9/14
11
+ """
12
+ 通过一些预处理方法, 来提高VAD的准确率
13
+ """
14
+ import numpy as np
15
+ import noisereduce as nr
16
+
17
+
18
+ def pre_emphasis(audio_data, alpha=0.97):
19
+ """
20
+ 预加重
21
+ """
22
+ # y(n)=x(n)−α⋅x(n−1)
23
+ emphasized_audio = np.append(audio_data[0], audio_data[1:] - alpha * audio_data[:-1])
24
+ return emphasized_audio
25
+
26
+
27
+ def NS(wav, sr=16000, stationary=True, prop_decrease=1.):
28
+ """ 传统降噪 Doc: https://pypi.org/project/noisereduce/
29
+ :param wav: (xxx,) or (channels, xxx)
30
+ :param sr: 采样率
31
+ :param stationary: 平稳降噪还是非平稳降噪
32
+ :param prop_decrease: 0~1, 降噪噪声百分比
33
+ :return:
34
+ """
35
+ if stationary:
36
+ # 平稳噪声抑制 stationary=True
37
+ reduced_noise = nr.reduce_noise(y=wav, sr=sr, stationary=True,
38
+ prop_decrease=prop_decrease, # 降噪噪声的比例
39
+ )
40
+ else:
41
+ # 非平稳噪声抑制 stationary=False
42
+ reduced_noise = nr.reduce_noise(y=wav, sr=sr, stationary=False,
43
+ prop_decrease=prop_decrease,
44
+ )
45
+ return reduced_noise
46
+
47
+
48
+ def NS_test():
49
+ import soundfile as sf
50
+ sr = 16000
51
+ wav_path = "../../data/vad_example.wav"
52
+ wav, wav_sr = sf.read(wav_path, always_2d=False, dtype="float32")
53
+ wav_NS = NS(wav, sr=sr, stationary=True, prop_decrease=0.6)
54
+ sf.write("../../wav_data/000_short_NS.wav", wav_NS, samplerate=sr)
55
+
56
+ # 绘制降噪后的频谱图
57
+ import matplotlib.pyplot as plt
58
+ plt.subplot(211)
59
+ plt.specgram(wav, Fs=sr, scale_by_freq=True, sides='default', cmap="jet")
60
+ plt.subplot(212)
61
+ plt.specgram(wav_NS, Fs=sr, scale_by_freq=True, sides='default', cmap="jet")
62
+ plt.show()
63
+
64
+
65
+ if __name__ == "__main__":
66
+ NS_test()
@@ -0,0 +1,50 @@
1
+ # -*- coding:utf-8 -*-
2
+ # Author:凌逆战 | Never
3
+ # Date: 2024/9/19
4
+ """
5
+
6
+ """
7
+ import torch
8
+
9
+
10
+ class Silero_VAD_C():
11
+ def __init__(self, sr=16000, threshold=0.5, min_speech_duration_ms=10,
12
+ min_silence_duration_ms=140, window_size_samples=512, speech_pad_ms=0):
13
+ self.sr = sr
14
+ self.threshold = threshold
15
+ self.min_speech_duration_ms = min_speech_duration_ms # 语音块的最小持续时间 ms
16
+ self.min_silence_duration_ms = min_silence_duration_ms # 语音块之间的最小静音时间 ms
17
+ self.window_size_samples = window_size_samples # 512\1024\1536
18
+ self.speech_pad_ms = speech_pad_ms # 最后的语音块由两侧的speech_pad_ms填充
19
+
20
+ self.model, utils = torch.hub.load(repo_or_dir='snakers4/silero-vad', model='silero_vad', force_reload=False,
21
+ onnx=True)
22
+ (self.get_speech_timestamps, save_audio, read_audio, VADIterator, collect_chunks) = utils
23
+
24
+ def process(self, wav):
25
+ assert wav.ndim == 1, f"wav shape为{wav.shape}, 期望1D"
26
+ speech_timestamps = self.get_speech_timestamps(wav, self.model,
27
+ sampling_rate=self.sr,
28
+ threshold=self.threshold,
29
+ min_speech_duration_ms=self.min_speech_duration_ms,
30
+ min_silence_duration_ms=self.min_silence_duration_ms,
31
+ window_size_samples=self.window_size_samples,
32
+ speech_pad_ms=self.speech_pad_ms,
33
+ )
34
+ return speech_timestamps
35
+
36
+
37
+ if __name__ == "__main__":
38
+ import soundfile as sf
39
+ from neverlib.vad.PreProcess import HPFilter, volume_norm
40
+
41
+ sr = 16000
42
+ wav_path = "../../data/vad_example.wav"
43
+ wav, wav_sr = sf.read(wav_path, always_2d=False, dtype="float32")
44
+ assert wav_sr == sr, f"音频采样率为{wav_sr}, 期望{sr}"
45
+ wav = HPFilter(wav, sr=sr, order=6, cutoff=100)
46
+ wav = volume_norm(wav)
47
+
48
+ vad = Silero_VAD_C()
49
+ vad_array = vad.process(wav)
50
+ print(vad_array)
@@ -0,0 +1,50 @@
1
+ # -*- coding:utf-8 -*-
2
+ # Author:凌逆战 | Never
3
+ # Date: 2024/9/19
4
+ """
5
+
6
+ """
7
+ import torch
8
+
9
+
10
+ class Silero_VAD_C():
11
+ def __init__(self, sr=16000, threshold=0.5, min_speech_duration_ms=10,
12
+ min_silence_duration_ms=140, window_size_samples=512, speech_pad_ms=0):
13
+ self.sr = sr
14
+ self.threshold = threshold
15
+ self.min_speech_duration_ms = min_speech_duration_ms # 语音块的最小持续时间 ms
16
+ self.min_silence_duration_ms = min_silence_duration_ms # 语音块之间的最小静音时间 ms
17
+ self.window_size_samples = window_size_samples # 512\1024\1536
18
+ self.speech_pad_ms = speech_pad_ms # 最后的语音块由两侧的speech_pad_ms填充
19
+
20
+ self.model, utils = torch.hub.load(repo_or_dir='snakers4/silero-vad', model='silero_vad', force_reload=False,
21
+ onnx=True)
22
+ (self.get_speech_timestamps, save_audio, read_audio, VADIterator, collect_chunks) = utils
23
+
24
+ def process(self, wav):
25
+ assert wav.ndim == 1, f"wav shape为{wav.shape}, 期望1D"
26
+ speech_timestamps = self.get_speech_timestamps(wav, self.model,
27
+ sampling_rate=self.sr,
28
+ threshold=self.threshold,
29
+ min_speech_duration_ms=self.min_speech_duration_ms,
30
+ min_silence_duration_ms=self.min_silence_duration_ms,
31
+ window_size_samples=self.window_size_samples,
32
+ speech_pad_ms=self.speech_pad_ms,
33
+ )
34
+ return speech_timestamps
35
+
36
+
37
+ if __name__ == "__main__":
38
+ import soundfile as sf
39
+ from neverlib.vad.PreProcess import HPFilter, volume_norm
40
+
41
+ sr = 16000
42
+ wav_path = "../../data/vad_example.wav"
43
+ wav, wav_sr = sf.read(wav_path, always_2d=False, dtype="float32")
44
+ assert wav_sr == sr, f"音频采样率为{wav_sr}, 期望{sr}"
45
+ wav = HPFilter(wav, sr=sr, order=6, cutoff=100)
46
+ wav = volume_norm(wav)
47
+
48
+ vad = Silero_VAD_C()
49
+ vad_array = vad.process(wav)
50
+ print(vad_array)
@@ -0,0 +1,61 @@
1
+ # -*- coding:utf-8 -*-
2
+ # Author:凌逆战 | Never
3
+ # Date: 2024/9/19
4
+ """
5
+
6
+ """
7
+ import numpy as np
8
+
9
+
10
+ class WebRTC_VAD_C():
11
+ def __init__(self, sr=16000, window_len=10, mode=1):
12
+ """
13
+ :param window_len: 窗长(ms)
14
+ :param mode:
15
+ """
16
+ import webrtcvad
17
+ self.sr = sr
18
+ self.vad = webrtcvad.Vad()
19
+ self.vad.set_mode(mode) # 0~3
20
+ self.window_len = int(window_len / 1000 * sr)
21
+
22
+ def process(self, wav):
23
+ assert wav.ndim == 1, f"wav shape为{wav.shape}, 期望1D"
24
+ # float32 -> int16
25
+ wav_int16 = (wav * np.iinfo(np.int16).max).astype(np.int16)
26
+ wav_int16 = wav_int16[:len(wav_int16) - len(wav_int16) % self.window_len] # (105120, 1)
27
+ vad_array = np.zeros_like(wav_int16)
28
+ for i in range(0, len(wav_int16), self.window_len):
29
+ vad_flag = self.vad.is_speech(wav_int16[i:i + self.window_len].tobytes(), self.sr)
30
+ vad_array[i:i + self.window_len] = vad_flag
31
+
32
+ return vad_array
33
+
34
+
35
+ if __name__ == "__main__":
36
+ import soundfile as sf
37
+ import matplotlib.pyplot as plt
38
+ from neverlib.vad.PreProcess import HPFilter, volume_norm
39
+
40
+ sr = 16000
41
+ wav_path = "../../data/vad_example.wav"
42
+ wav, wav_sr = sf.read(wav_path, always_2d=False, dtype="float32")
43
+ assert wav_sr == sr, f"音频采样率为{wav_sr}, 期望{sr}"
44
+ wav = HPFilter(wav, sr=sr, order=6, cutoff=100)
45
+ wav = volume_norm(wav)
46
+
47
+ vad = WebRTC_VAD_C()
48
+ vad_array = vad.process(wav)
49
+
50
+ plt.figure(figsize=(20, 5))
51
+ plt.plot(wav)
52
+ plt.plot(vad_array)
53
+ plt.grid()
54
+ plt.show()
55
+
56
+ plt.figure(figsize=(20, 5))
57
+ plt.subplot(2, 1, 1)
58
+ plt.specgram(wav, Fs=sr, scale_by_freq=True, sides='default', cmap="jet")
59
+ plt.subplot(2, 1, 2)
60
+ plt.specgram(vad_array, Fs=sr, scale_by_freq=True, sides='default', cmap="jet")
61
+ plt.show()
@@ -0,0 +1,61 @@
1
+ # -*- coding:utf-8 -*-
2
+ # Author:凌逆战 | Never
3
+ # Date: 2024/9/19
4
+ """
5
+
6
+ """
7
+ import numpy as np
8
+
9
+
10
+ class WebRTC_VAD_C():
11
+ def __init__(self, sr=16000, window_len=10, mode=1):
12
+ """
13
+ :param window_len: 窗长(ms)
14
+ :param mode:
15
+ """
16
+ import webrtcvad
17
+ self.sr = sr
18
+ self.vad = webrtcvad.Vad()
19
+ self.vad.set_mode(mode) # 0~3
20
+ self.window_len = int(window_len / 1000 * sr)
21
+
22
+ def process(self, wav):
23
+ assert wav.ndim == 1, f"wav shape为{wav.shape}, 期望1D"
24
+ # float32 -> int16
25
+ wav_int16 = (wav * np.iinfo(np.int16).max).astype(np.int16)
26
+ wav_int16 = wav_int16[:len(wav_int16) - len(wav_int16) % self.window_len] # (105120, 1)
27
+ vad_array = np.zeros_like(wav_int16)
28
+ for i in range(0, len(wav_int16), self.window_len):
29
+ vad_flag = self.vad.is_speech(wav_int16[i:i + self.window_len].tobytes(), self.sr)
30
+ vad_array[i:i + self.window_len] = vad_flag
31
+
32
+ return vad_array
33
+
34
+
35
+ if __name__ == "__main__":
36
+ import soundfile as sf
37
+ import matplotlib.pyplot as plt
38
+ from neverlib.vad.PreProcess import HPFilter, volume_norm
39
+
40
+ sr = 16000
41
+ wav_path = "../../data/vad_example.wav"
42
+ wav, wav_sr = sf.read(wav_path, always_2d=False, dtype="float32")
43
+ assert wav_sr == sr, f"音频采样率为{wav_sr}, 期望{sr}"
44
+ wav = HPFilter(wav, sr=sr, order=6, cutoff=100)
45
+ wav = volume_norm(wav)
46
+
47
+ vad = WebRTC_VAD_C()
48
+ vad_array = vad.process(wav)
49
+
50
+ plt.figure(figsize=(20, 5))
51
+ plt.plot(wav)
52
+ plt.plot(vad_array)
53
+ plt.grid()
54
+ plt.show()
55
+
56
+ plt.figure(figsize=(20, 5))
57
+ plt.subplot(2, 1, 1)
58
+ plt.specgram(wav, Fs=sr, scale_by_freq=True, sides='default', cmap="jet")
59
+ plt.subplot(2, 1, 2)
60
+ plt.specgram(vad_array, Fs=sr, scale_by_freq=True, sides='default', cmap="jet")
61
+ plt.show()
@@ -0,0 +1,54 @@
1
+ # -*- coding:utf-8 -*-
2
+ # Author:凌逆战 | Never
3
+ # Date: 2024/9/19
4
+ """
5
+
6
+ """
7
+ import numpy as np
8
+
9
+
10
+ class FunASR_VAD_C():
11
+ def __init__(self, sr=16000):
12
+ from funasr import AutoModel
13
+ self.sr = sr
14
+ self.model = AutoModel(model="fsmn-vad", model_revision="v2.0.4")
15
+
16
+ def process(self, wav):
17
+ assert wav.ndim == 1, f"wav shape为{wav.shape}, 期望1D"
18
+ res_list = self.model.generate(input=wav)
19
+ vad_array = np.zeros_like(wav)
20
+ for res in res_list:
21
+ for value_item in res["value"]:
22
+ beg, end = value_item
23
+ vad_array[int(beg * self.sr / 1000):int(end * self.sr / 1000)] = 1
24
+
25
+ return vad_array
26
+
27
+
28
+ if __name__ == "__main__":
29
+ import soundfile as sf
30
+ import matplotlib.pyplot as plt
31
+ from neverlib.vad.PreProcess import HPFilter, volume_norm
32
+
33
+ sr = 16000
34
+ wav_path = "../../data/vad_example.wav"
35
+ wav, wav_sr = sf.read(wav_path, always_2d=False, dtype="float32")
36
+ assert wav_sr == sr, f"音频采样率为{wav_sr}, 期望{sr}"
37
+ wav = HPFilter(wav, sr=sr, order=6, cutoff=100)
38
+ wav = volume_norm(wav)
39
+
40
+ vad = FunASR_VAD_C()
41
+ vad_array = vad.process(wav)
42
+
43
+ plt.figure(figsize=(20, 5))
44
+ plt.plot(wav)
45
+ plt.plot(vad_array)
46
+ plt.grid()
47
+ plt.show()
48
+
49
+ plt.figure(figsize=(20, 5))
50
+ plt.subplot(2, 1, 1)
51
+ plt.specgram(wav, Fs=sr, scale_by_freq=True, sides='default', cmap="jet")
52
+ plt.subplot(2, 1, 2)
53
+ plt.specgram(vad_array, Fs=sr, scale_by_freq=True, sides='default', cmap="jet")
54
+ plt.show()
@@ -0,0 +1,54 @@
1
+ # -*- coding:utf-8 -*-
2
+ # Author:凌逆战 | Never
3
+ # Date: 2024/9/19
4
+ """
5
+
6
+ """
7
+ import numpy as np
8
+
9
+
10
+ class FunASR_VAD_C():
11
+ def __init__(self, sr=16000):
12
+ from funasr import AutoModel
13
+ self.sr = sr
14
+ self.model = AutoModel(model="fsmn-vad", model_revision="v2.0.4")
15
+
16
+ def process(self, wav):
17
+ assert wav.ndim == 1, f"wav shape为{wav.shape}, 期望1D"
18
+ res_list = self.model.generate(input=wav)
19
+ vad_array = np.zeros_like(wav)
20
+ for res in res_list:
21
+ for value_item in res["value"]:
22
+ beg, end = value_item
23
+ vad_array[int(beg * self.sr / 1000):int(end * self.sr / 1000)] = 1
24
+
25
+ return vad_array
26
+
27
+
28
+ if __name__ == "__main__":
29
+ import soundfile as sf
30
+ import matplotlib.pyplot as plt
31
+ from neverlib.vad.PreProcess import HPFilter, volume_norm
32
+
33
+ sr = 16000
34
+ wav_path = "../../data/vad_example.wav"
35
+ wav, wav_sr = sf.read(wav_path, always_2d=False, dtype="float32")
36
+ assert wav_sr == sr, f"音频采样率为{wav_sr}, 期望{sr}"
37
+ wav = HPFilter(wav, sr=sr, order=6, cutoff=100)
38
+ wav = volume_norm(wav)
39
+
40
+ vad = FunASR_VAD_C()
41
+ vad_array = vad.process(wav)
42
+
43
+ plt.figure(figsize=(20, 5))
44
+ plt.plot(wav)
45
+ plt.plot(vad_array)
46
+ plt.grid()
47
+ plt.show()
48
+
49
+ plt.figure(figsize=(20, 5))
50
+ plt.subplot(2, 1, 1)
51
+ plt.specgram(wav, Fs=sr, scale_by_freq=True, sides='default', cmap="jet")
52
+ plt.subplot(2, 1, 2)
53
+ plt.specgram(vad_array, Fs=sr, scale_by_freq=True, sides='default', cmap="jet")
54
+ plt.show()