neverlib 0.2.4__py3-none-any.whl → 0.2.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (260) hide show
  1. neverlib/.history/Docs/audio_aug/del_20250827162530.py +0 -0
  2. neverlib/.history/Docs/audio_aug/del_20250827162540.py +2 -0
  3. neverlib/.history/Docs/audio_aug/del_20250827162541.py +7 -0
  4. neverlib/.history/Docs/audio_aug/del_20250827162606.py +7 -0
  5. neverlib/.history/Docs/audio_aug/del_20250827162637.py +8 -0
  6. neverlib/.history/Docs/audio_aug/del_20250827162645.py +8 -0
  7. neverlib/.history/Docs/audio_aug/del_20250827162723.py +9 -0
  8. neverlib/.history/Docs/audio_aug/del_20250827162739.py +9 -0
  9. neverlib/.history/Docs/audio_aug/test_snr_20250827161751.py +55 -0
  10. neverlib/.history/Docs/audio_aug/test_snr_20250827161754.py +55 -0
  11. neverlib/.history/Docs/audio_aug/test_snr_20250827161833.py +54 -0
  12. neverlib/.history/Docs/audio_aug/test_snr_20250827162017.py +56 -0
  13. neverlib/.history/Docs/audio_aug/test_snr_20250827162021.py +57 -0
  14. neverlib/.history/Docs/audio_aug/test_snr_20250827162028.py +57 -0
  15. neverlib/.history/Docs/audio_aug_test/del_20250827162738.py +9 -0
  16. neverlib/.history/Docs/audio_aug_test/del_20250827162819.py +9 -0
  17. neverlib/.history/Docs/audio_aug_test/del_20250827162830.py +9 -0
  18. neverlib/.history/Docs/audio_aug_test/del_20250827162846.py +9 -0
  19. neverlib/.history/Docs/audio_aug_test/del_20250827162851.py +9 -0
  20. neverlib/.history/Docs/audio_aug_test/del_20250827162903.py +10 -0
  21. neverlib/.history/Docs/audio_aug_test/del_20250827162921.py +10 -0
  22. neverlib/.history/Docs/audio_aug_test/del_20250827162926.py +10 -0
  23. neverlib/.history/Docs/audio_aug_test/del_20250827163030.py +10 -0
  24. neverlib/.history/Docs/audio_aug_test/del_20250827163032.py +10 -0
  25. neverlib/.history/QA/html2markdown_20250822234112.md +0 -0
  26. neverlib/.history/QA/html2markdown_20250822234140.py +9 -0
  27. neverlib/.history/QA/html2markdown_20250822234141.md +9 -0
  28. neverlib/.history/QA/html2markdown_20250822234159.py +12 -0
  29. neverlib/.history/QA/html2markdown_20250822234200.py +17 -0
  30. neverlib/.history/QA/html2markdown_20250822234236.py +17 -0
  31. neverlib/.history/QA/html2markdown_20250822234340.py +14 -0
  32. neverlib/.history/QA/html2markdown_20250822234522.py +18 -0
  33. neverlib/.history/QA/html2markdown_20250822234601.py +20 -0
  34. neverlib/.history/QA/html2markdown_20250822234615.py +22 -0
  35. neverlib/.history/QA/html2markdown_20250822234715.py +28 -0
  36. neverlib/.history/QA/html2markdown_20250822234720.py +27 -0
  37. neverlib/.history/QA/html2markdown_20250822234903.py +27 -0
  38. neverlib/.history/__init___20250805234212.py +41 -0
  39. neverlib/.history/__init___20250904102635.py +39 -0
  40. neverlib/.history/__init___20250904102836.py +34 -0
  41. neverlib/.history/__init___20250904102838.py +39 -0
  42. neverlib/.history/__init___20250904102851.py +33 -0
  43. neverlib/.history/audio_aug/audio_aug_20250826155913.py +158 -0
  44. neverlib/.history/audio_aug/audio_aug_20250826164159.py +159 -0
  45. neverlib/.history/audio_aug/audio_aug_20250826164217.py +160 -0
  46. neverlib/.history/audio_aug/audio_aug_20250826164408.py +161 -0
  47. neverlib/.history/audio_aug/audio_aug_20250826164423.py +161 -0
  48. neverlib/.history/audio_aug/audio_aug_20250826164529.py +161 -0
  49. neverlib/.history/audio_aug/audio_aug_20250826164824.py +161 -0
  50. neverlib/.history/audio_aug/audio_aug_20250826164932.py +162 -0
  51. neverlib/.history/audio_aug/audio_aug_20250826164947.py +162 -0
  52. neverlib/.history/audio_aug/audio_aug_20250826165403.py +162 -0
  53. neverlib/.history/audio_aug/audio_aug_20250826165421.py +162 -0
  54. neverlib/.history/audio_aug/audio_aug_20250826165509.py +163 -0
  55. neverlib/.history/audio_aug/audio_aug_20250826165702.py +163 -0
  56. neverlib/.history/audio_aug/audio_aug_20250826165732.py +165 -0
  57. neverlib/.history/audio_aug/audio_aug_20250826170041.py +163 -0
  58. neverlib/.history/audio_aug/audio_aug_20250826170105.py +164 -0
  59. neverlib/.history/audio_aug/audio_aug_20250826170154.py +164 -0
  60. neverlib/.history/audio_aug/audio_aug_20250826170220.py +165 -0
  61. neverlib/.history/audio_aug/audio_aug_20250826170221.py +165 -0
  62. neverlib/.history/audio_aug/audio_aug_20250826170228.py +165 -0
  63. neverlib/.history/audio_aug/audio_aug_20250826170231.py +165 -0
  64. neverlib/.history/audio_aug/audio_aug_20250826212001.py +165 -0
  65. neverlib/.history/audio_aug/audio_aug_20250826220038.py +165 -0
  66. neverlib/.history/audio_aug/audio_aug_20250826220133.py +165 -0
  67. neverlib/.history/audio_aug/audio_aug_20250826220148.py +165 -0
  68. neverlib/.history/audio_aug/audio_aug_20250826220154.py +165 -0
  69. neverlib/.history/audio_aug/audio_aug_20250826220156.py +165 -0
  70. neverlib/.history/audio_aug/audio_aug_20250826220314.py +165 -0
  71. neverlib/.history/audio_aug/audio_aug_20250826220343.py +184 -0
  72. neverlib/.history/audio_aug/audio_aug_20250826220345.py +184 -0
  73. neverlib/.history/audio_aug/audio_aug_20250826220349.py +184 -0
  74. neverlib/.history/audio_aug/audio_aug_20250826220429.py +184 -0
  75. neverlib/.history/audio_aug/audio_aug_20250826220447.py +184 -0
  76. neverlib/.history/audio_aug/audio_aug_20250826220601.py +186 -0
  77. neverlib/.history/audio_aug/audio_aug_20250826220638.py +186 -0
  78. neverlib/.history/audio_aug/audio_aug_20250826220641.py +186 -0
  79. neverlib/.history/audio_aug/audio_aug_20250826220647.py +186 -0
  80. neverlib/.history/audio_aug/audio_aug_20250826220653.py +186 -0
  81. neverlib/.history/audio_aug/audio_aug_20250826220655.py +186 -0
  82. neverlib/.history/audio_aug/audio_aug_20250826220731.py +185 -0
  83. neverlib/.history/audio_aug/audio_aug_20250826220739.py +185 -0
  84. neverlib/.history/audio_aug/audio_aug_20250826220747.py +185 -0
  85. neverlib/.history/audio_aug/audio_aug_20250826220801.py +186 -0
  86. neverlib/.history/audio_aug/audio_aug_20250826220822.py +186 -0
  87. neverlib/.history/audio_aug/audio_aug_20250826220901.py +186 -0
  88. neverlib/.history/audio_aug/audio_aug_20250826221107.py +187 -0
  89. neverlib/.history/audio_aug/audio_aug_20250826221310.py +188 -0
  90. neverlib/.history/audio_aug/audio_aug_20250826221353.py +191 -0
  91. neverlib/.history/audio_aug/audio_aug_20250826221821.py +191 -0
  92. neverlib/.history/audio_aug/audio_aug_20250826221838.py +191 -0
  93. neverlib/.history/audio_aug/audio_aug_20250826221906.py +191 -0
  94. neverlib/.history/audio_aug/audio_aug_20250826221930.py +191 -0
  95. neverlib/.history/audio_aug/audio_aug_20250826221939.py +191 -0
  96. neverlib/.history/audio_aug/audio_aug_20250826221955.py +191 -0
  97. neverlib/.history/audio_aug/audio_aug_20250826222008.py +197 -0
  98. neverlib/.history/audio_aug/audio_aug_20250826222017.py +200 -0
  99. neverlib/.history/audio_aug/audio_aug_20250826222046.py +203 -0
  100. neverlib/.history/audio_aug/audio_aug_20250826222105.py +203 -0
  101. neverlib/.history/audio_aug/audio_aug_20250826222206.py +203 -0
  102. neverlib/.history/audio_aug/audio_aug_20250826222302.py +203 -0
  103. neverlib/.history/audio_aug/audio_aug_20250826222336.py +203 -0
  104. neverlib/.history/audio_aug/audio_aug_20250826222455.py +204 -0
  105. neverlib/.history/audio_aug/audio_aug_20250826222526.py +204 -0
  106. neverlib/.history/audio_aug/audio_aug_20250826222541.py +204 -0
  107. neverlib/.history/audio_aug/audio_aug_20250826222624.py +202 -0
  108. neverlib/.history/audio_aug/audio_aug_20250826222714.py +205 -0
  109. neverlib/.history/audio_aug/audio_aug_20250826222820.py +205 -0
  110. neverlib/.history/audio_aug/audio_aug_20250826222827.py +205 -0
  111. neverlib/.history/audio_aug/audio_aug_20250826222927.py +232 -0
  112. neverlib/.history/audio_aug/audio_aug_20250826223009.py +232 -0
  113. neverlib/.history/audio_aug/audio_aug_20250826223054.py +232 -0
  114. neverlib/.history/audio_aug/audio_aug_20250826223225.py +233 -0
  115. neverlib/.history/audio_aug/audio_aug_20250826223344.py +236 -0
  116. neverlib/.history/audio_aug/audio_aug_20250826223356.py +236 -0
  117. neverlib/.history/audio_aug/audio_aug_20250826223955.py +242 -0
  118. neverlib/.history/audio_aug/audio_aug_20250826224210.py +240 -0
  119. neverlib/.history/audio_aug/audio_aug_20250826224250.py +242 -0
  120. neverlib/.history/audio_aug/audio_aug_20250826224323.py +280 -0
  121. neverlib/.history/audio_aug/audio_aug_20250826224452.py +263 -0
  122. neverlib/.history/audio_aug/audio_aug_20250826224455.py +263 -0
  123. neverlib/.history/audio_aug/audio_aug_20250826224502.py +263 -0
  124. neverlib/.history/audio_aug/audio_aug_20250826224528.py +263 -0
  125. neverlib/.history/audio_aug/audio_aug_20250826224658.py +263 -0
  126. neverlib/.history/audio_aug/audio_aug_20250826224833.py +264 -0
  127. neverlib/.history/audio_aug/audio_aug_20250826225013.py +269 -0
  128. neverlib/.history/audio_aug/audio_aug_20250826225050.py +269 -0
  129. neverlib/.history/audio_aug/audio_aug_20250826225241.py +268 -0
  130. neverlib/.history/audio_aug/audio_aug_20250826225315.py +266 -0
  131. neverlib/.history/audio_aug/audio_aug_20250826225404.py +266 -0
  132. neverlib/.history/audio_aug/audio_aug_20250826225502.py +265 -0
  133. neverlib/.history/audio_aug/audio_aug_20250826225950.py +267 -0
  134. neverlib/.history/audio_aug/audio_aug_20250826225959.py +268 -0
  135. neverlib/.history/audio_aug/audio_aug_20250826230222.py +271 -0
  136. neverlib/.history/audio_aug/audio_aug_20250826230248.py +270 -0
  137. neverlib/.history/audio_aug/audio_aug_20250826230638.py +266 -0
  138. neverlib/.history/audio_aug/audio_aug_20250826230755.py +266 -0
  139. neverlib/.history/audio_aug/audio_aug_20250826230941.py +265 -0
  140. neverlib/.history/audio_aug/audio_aug_20250826231054.py +266 -0
  141. neverlib/.history/audio_aug/audio_aug_20250826231117.py +266 -0
  142. neverlib/.history/audio_aug/audio_aug_20250826231219.py +266 -0
  143. neverlib/.history/audio_aug/audio_aug_20250826232330.py +266 -0
  144. neverlib/.history/audio_aug/audio_aug_20250826232352.py +266 -0
  145. neverlib/.history/audio_aug/audio_aug_20250827152748.py +268 -0
  146. neverlib/.history/audio_aug/audio_aug_20250827152806.py +268 -0
  147. neverlib/.history/audio_aug/audio_aug_20250827152808.py +268 -0
  148. neverlib/.history/audio_aug/audio_aug_20250827152917.py +283 -0
  149. neverlib/.history/audio_aug/audio_aug_20250827152929.py +281 -0
  150. neverlib/.history/audio_aug/audio_aug_20250827153100.py +286 -0
  151. neverlib/.history/audio_aug/audio_aug_20250827153102.py +286 -0
  152. neverlib/.history/audio_aug/audio_aug_20250827153301.py +295 -0
  153. neverlib/.history/audio_aug/audio_aug_20250827153331.py +298 -0
  154. neverlib/.history/audio_aug/audio_aug_20250827153525.py +303 -0
  155. neverlib/.history/audio_aug/audio_aug_20250827153533.py +304 -0
  156. neverlib/.history/audio_aug/audio_aug_20250827153541.py +321 -0
  157. neverlib/.history/audio_aug/audio_aug_20250827153805.py +322 -0
  158. neverlib/.history/audio_aug/audio_aug_20250827153832.py +323 -0
  159. neverlib/.history/audio_aug/audio_aug_20250827153836.py +324 -0
  160. neverlib/.history/audio_aug/audio_aug_20250827153846.py +324 -0
  161. neverlib/.history/audio_aug/audio_aug_20250827153859.py +325 -0
  162. neverlib/.history/audio_aug/audio_aug_20250827154453.py +337 -0
  163. neverlib/.history/audio_aug/audio_aug_20250827154513.py +355 -0
  164. neverlib/.history/audio_aug/audio_aug_20250827154538.py +356 -0
  165. neverlib/.history/audio_aug/audio_aug_20250827154541.py +357 -0
  166. neverlib/.history/audio_aug/audio_aug_20250827154612.py +357 -0
  167. neverlib/.history/audio_aug/audio_aug_20250827154657.py +360 -0
  168. neverlib/.history/audio_aug/audio_aug_20250827154708.py +360 -0
  169. neverlib/.history/audio_aug/audio_aug_20250827154728.py +366 -0
  170. neverlib/.history/audio_aug/audio_aug_20250827154755.py +367 -0
  171. neverlib/.history/audio_aug/audio_aug_20250827154800.py +367 -0
  172. neverlib/.history/audio_aug/audio_aug_20250827154917.py +368 -0
  173. neverlib/.history/audio_aug/audio_aug_20250827154928.py +369 -0
  174. neverlib/.history/audio_aug/audio_aug_20250827154932.py +370 -0
  175. neverlib/.history/audio_aug/audio_aug_20250827154947.py +372 -0
  176. neverlib/.history/audio_aug/audio_aug_20250827155015.py +375 -0
  177. neverlib/.history/audio_aug/audio_aug_20250827155106.py +375 -0
  178. neverlib/.history/audio_aug/audio_aug_20250827155114.py +393 -0
  179. neverlib/.history/audio_aug/audio_aug_20250827155207.py +415 -0
  180. neverlib/.history/audio_aug/audio_aug_20250827155300.py +415 -0
  181. neverlib/.history/audio_aug/audio_aug_20250827155321.py +471 -0
  182. neverlib/.history/audio_aug/audio_aug_20250827164703.py +471 -0
  183. neverlib/.history/audio_aug/audio_aug_20250827164749.py +471 -0
  184. neverlib/.history/audio_aug/audio_aug_20250827165252.py +472 -0
  185. neverlib/.history/audio_aug/audio_aug_20250827165334.py +472 -0
  186. neverlib/.history/audio_aug/audio_aug_20250827165404.py +473 -0
  187. neverlib/.history/audio_aug/audio_aug_20250827165610.py +473 -0
  188. neverlib/.history/audio_aug/audio_aug_20250827165805.py +473 -0
  189. neverlib/.history/audio_aug/audio_aug_20250827170056.py +473 -0
  190. neverlib/.history/audio_aug/audio_aug_20250827170106.py +472 -0
  191. neverlib/.history/audio_aug/audio_aug_20250827170143.py +472 -0
  192. neverlib/.history/audio_aug/audio_aug_20250827170216.py +472 -0
  193. neverlib/.history/audio_aug/audio_aug_20250827170218.py +472 -0
  194. neverlib/.history/audio_aug/audio_aug_20250827170314.py +472 -0
  195. neverlib/.history/audio_aug/audio_aug_20250827171500.py +471 -0
  196. neverlib/.history/audio_aug/audio_aug_20250827172347.py +471 -0
  197. neverlib/.history/audio_aug/audio_aug_20250827172558.py +470 -0
  198. neverlib/.history/audio_aug/audio_aug_20250827172559.py +470 -0
  199. neverlib/.history/audio_aug/audio_aug_20250827172801.py +470 -0
  200. neverlib/.history/audio_aug/audio_aug_20250827182522.py +470 -0
  201. neverlib/.history/audio_aug/audio_aug_20250827182526.py +470 -0
  202. neverlib/.history/audio_aug/audio_aug_20250827182626.py +470 -0
  203. neverlib/.history/audio_aug/audio_aug_20250827182715.py +470 -0
  204. neverlib/.history/audio_aug/audio_aug_20250904185444.py +470 -0
  205. neverlib/.history/audio_aug/audio_aug_20250904185538.py +445 -0
  206. neverlib/.history/data_analyze/__init___20250806204158.py +14 -0
  207. neverlib/.history/data_analyze/__init___20250827163248.py +14 -0
  208. neverlib/.history/filter/auto_eq/freq_eq_20250821143140.py +76 -0
  209. neverlib/.history/filter/auto_eq/freq_eq_20250821153208.py +76 -0
  210. neverlib/.history/filter/auto_eq/freq_eq_20250821153214.py +76 -0
  211. neverlib/.history/filter/auto_eq/ga_eq_basic_20250901110521.py +385 -0
  212. neverlib/.history/filter/auto_eq/ga_eq_basic_20250901110652.py +385 -0
  213. neverlib/.history/filter/common_20250806002134.py +37 -0
  214. neverlib/.history/filter/common_20250821120448.py +49 -0
  215. neverlib/.history/filter/common_20250821120453.py +49 -0
  216. neverlib/.history/metrics/snr_20250827224201.py +182 -0
  217. neverlib/.history/metrics/snr_20250827234019.py +186 -0
  218. neverlib/.history/metrics/snr_20250827234028.py +186 -0
  219. neverlib/.history/metrics/snr_20250827234030.py +186 -0
  220. neverlib/.history/utils/audio_split_20250805234209.py +268 -0
  221. neverlib/.history/utils/audio_split_20250904185309.py +268 -0
  222. neverlib/.history/utils/utils_20250813165516.py +330 -0
  223. neverlib/.history/utils/utils_20250904181341.py +328 -0
  224. neverlib/.history/utils/utils_20250904185546.py +352 -0
  225. neverlib/.history/utils/utils_20250904185548.py +353 -0
  226. neverlib/.history/utils/utils_20250904185603.py +353 -0
  227. neverlib/.history/utils/utils_20250904185636.py +353 -0
  228. neverlib/.history/utils/utils_20250904185658.py +358 -0
  229. neverlib/.history/utils/utils_20250904190053.py +359 -0
  230. neverlib/.specstory/history/2025-08-22_02-10Z-/345/256/214/345/226/204/345/207/275/346/225/260/347/232/204/345/212/237/350/203/275/345/222/214/345/217/230/351/207/217/345/220/215/345/273/272/350/256/256.md +247 -0
  231. neverlib/.specstory/history/2025-08-26_11-54Z-oserror-missing-shared-object-file.md +87 -0
  232. neverlib/.specstory/history/2025-08-27_08-07Z-/345/256/214/345/226/204/346/265/213/350/257/225/346/226/207/346/241/243/347/232/204/350/256/250/350/256/272.md +296 -0
  233. neverlib/.specstory/history/2025-08-27_08-29Z-delete-python-file-command.md +211 -0
  234. neverlib/.specstory/history/2025-08-27_09-05Z-/345/234/250jupyter/344/270/255/346/222/255/346/224/276/351/237/263/351/242/221/347/232/204/344/273/243/347/240/201/344/277/256/346/224/271.md +357 -0
  235. neverlib/Docs/audio_aug_test/test_snr.py +55 -0
  236. neverlib/Docs/audio_aug_test/test_volume.py +0 -0
  237. neverlib/QA/html2markdown.py +27 -0
  238. neverlib/__init__.py +10 -20
  239. neverlib/audio_aug/__init__.py +6 -1
  240. neverlib/audio_aug/audio_aug.py +360 -55
  241. neverlib/data_analyze/__init__.py +8 -2
  242. neverlib/data_analyze/temporal_features.py +1 -1
  243. neverlib/filter/__init__.py +9 -3
  244. neverlib/filter/auto_eq/freq_eq.py +1 -1
  245. neverlib/filter/auto_eq/ga_eq_basic.py +3 -3
  246. neverlib/filter/common.py +12 -0
  247. neverlib/metrics/snr.py +5 -3
  248. neverlib/utils/__init__.py +14 -7
  249. neverlib/utils/lazy_module.py +81 -0
  250. neverlib/utils/message.py +3 -8
  251. neverlib/utils/utils.py +32 -3
  252. neverlib/vad/__init__.py +16 -9
  253. neverlib/vad/utils.py +20 -6
  254. {neverlib-0.2.4.dist-info → neverlib-0.2.6.dist-info}/METADATA +21 -17
  255. neverlib-0.2.6.dist-info/RECORD +467 -0
  256. neverlib-0.2.4.dist-info/RECORD +0 -229
  257. /neverlib/{Docs/audio_aug/test_snr.py → .history/Docs/audio_aug/test_snr_20250827162033.py} +0 -0
  258. {neverlib-0.2.4.dist-info → neverlib-0.2.6.dist-info}/WHEEL +0 -0
  259. {neverlib-0.2.4.dist-info → neverlib-0.2.6.dist-info}/licenses/LICENSE +0 -0
  260. {neverlib-0.2.4.dist-info → neverlib-0.2.6.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,415 @@
1
+ # -*- coding:utf-8 -*-
2
+ # Author:凌逆战 | Never
3
+ # Date: 2024/9/27
4
+ """
5
+
6
+ """
7
+ import random
8
+ import numpy as np
9
+ import soundfile as sf
10
+ from scipy import signal
11
+ from neverlib.utils import EPS
12
+ from neverlib.filter import HPFilter
13
+
14
+
15
+ def limiter(wav, threshold=0.999):
16
+ """
17
+ 简单限幅器, threshold=0.999 ~ -0.1dBFS
18
+ 超过阈值的样本被压缩到阈值限制
19
+ """
20
+ peak = np.max(np.abs(wav))
21
+ if peak > threshold:
22
+ scalar = threshold / (wav + EPS)
23
+ wav = wav * scalar
24
+ return wav
25
+
26
+
27
+ def add_reverb(wav, rir, ratio=1, mode="same"):
28
+ """添加混响,
29
+ Args:
30
+ wav: [T, channel]
31
+ rir: [T, channel]
32
+ ratio: 0-1
33
+ mode: "same" for SE or "full" for kws
34
+ """
35
+ if random.random() < ratio:
36
+ wav = signal.fftconvolve(wav, rir, mode=mode) # (28671, 3)
37
+ # note: 建议过完添加混响后再进行归一化, 否则可能会出现溢出
38
+ # 防止削波
39
+ if np.max(np.abs(wav)) > 1:
40
+ scale_factor = 1 / np.max(np.abs(wav))
41
+ wav *= scale_factor
42
+ return wav
43
+
44
+
45
+ def snr_aug_changeNoise(clean, noise, target_snr, hpf=False, sr=16000, order=4, cutoff=100):
46
+ """
47
+ 保持语音不变, 改变噪声的幅度
48
+ HP: 高通滤波, 如果你的数据工频干扰较高, 建议设置为True, 否则snr会不准
49
+ snr = 10 * log10(signal_power / k*noise_power)
50
+ """
51
+ assert clean.shape == noise.shape, "clean and noise must have the same shape"
52
+ clean_tmp, noise_tmp = clean.copy(), noise.copy()
53
+ if hpf:
54
+ clean_tmp = HPFilter(clean_tmp, sr=sr, order=order, cutoff=cutoff)
55
+ noise_tmp = HPFilter(noise_tmp, sr=sr, order=order, cutoff=cutoff)
56
+ # 纯净语音功率, 噪声功率
57
+ clean_power, noise_power = np.mean(clean_tmp ** 2), np.mean(noise_tmp ** 2)
58
+ noise_scale = np.sqrt(clean_power / (noise_power * 10 ** (target_snr / 10) + EPS))
59
+ noisy = clean + noise_scale * noise
60
+ # 防止削波
61
+ if np.max(np.abs(noisy)) > 1:
62
+ scale_factor = 1 / np.max(np.abs(noisy))
63
+ noisy *= scale_factor
64
+ clean *= scale_factor
65
+ return noisy, clean
66
+
67
+
68
+ def snr_aug_changeClean(clean, noise, target_snr, clip_check=True, hpf=False, sr=16000, order=4, cutoff=100):
69
+ """
70
+ 保持噪声不变,改变纯净语音的幅度以达到目标信噪比
71
+ snr = 10 * log10(k*signal_power/ noise_power)
72
+ """
73
+ assert clean.shape == noise.shape, "clean and noise must have the same shape"
74
+ clean_tmp, noise_tmp = clean.copy(), noise.copy()
75
+ if hpf:
76
+ clean_tmp = HPFilter(clean_tmp, sr=sr, order=order, cutoff=cutoff)
77
+ noise_tmp = HPFilter(noise_tmp, sr=sr, order=order, cutoff=cutoff)
78
+ # 纯净语音功率, 噪声功率
79
+ clean_power, noise_power = np.mean(clean_tmp ** 2), np.mean(noise_tmp ** 2)
80
+ # 计算纯净信号需要的幅度因子
81
+ clean_scale = np.sqrt(noise_power * 10 ** (target_snr / 10) / (clean_power + EPS))
82
+ noisy = clean * clean_scale + noise
83
+ # 防止削波
84
+ if clip_check:
85
+ if np.max(np.abs(noisy)) > 1:
86
+ scale_factor = 1 / np.max(np.abs(noisy))
87
+ noisy *= scale_factor
88
+ clean *= (scale_factor * clean_scale)
89
+ return noisy, clean
90
+
91
+
92
+ def snr_diff_changeNoise(clean, noise, target_snr, hpf=False, sr=16000, order=4, cutoff=100):
93
+ """
94
+ 保持语音不变, 改变噪声的幅度, 和snr_aug_changeNoise作用等效
95
+ snr = 10 * log10(signal_power / k*noise_power)
96
+ """
97
+ assert clean.shape == noise.shape, "clean and noise must have the same shape"
98
+ clean_tmp, noise_tmp = clean.copy(), noise.copy()
99
+ if hpf:
100
+ clean_tmp = HPFilter(clean_tmp, sr=sr, order=order, cutoff=cutoff)
101
+ noise_tmp = HPFilter(noise_tmp, sr=sr, order=order, cutoff=cutoff)
102
+ # 纯净语音功率, 噪声功率
103
+ clean_power, noise_power = np.mean(clean_tmp ** 2), np.mean(noise_tmp ** 2)
104
+ source_snr = 10 * np.log10(clean_power / (noise_power + EPS) + EPS)
105
+ noise_dB = source_snr - target_snr # 噪声还差多少dB
106
+ noise_gain = 10 ** (noise_dB / 20)
107
+ noisy = clean + noise_gain * noise
108
+ # 防止削波
109
+ if np.max(np.abs(noisy)) > 1:
110
+ scale_factor = 1 / np.max(np.abs(noisy))
111
+ noisy *= scale_factor
112
+ clean *= scale_factor
113
+ return noisy, clean
114
+
115
+
116
+ def snr_diff_changeClean(clean, noise, target_snr, clip_check=True, hpf=False, sr=16000, order=4, cutoff=100):
117
+ """
118
+ 保持噪声不变, 改变纯净语音的幅度, 和snr_aug_changeClean作用等效
119
+ snr = 10 * log10(signal_power / k*noise_power)
120
+ """
121
+ assert clean.shape == noise.shape, "clean and noise must have the same shape"
122
+ clean_tmp, noise_tmp = clean.copy(), noise.copy()
123
+ if hpf:
124
+ clean_tmp = HPFilter(clean_tmp, sr=sr, order=order, cutoff=cutoff)
125
+ noise_tmp = HPFilter(noise_tmp, sr=sr, order=order, cutoff=cutoff)
126
+ # 纯净语音功率, 噪声功率
127
+ clean_power, noise_power = np.mean(clean_tmp ** 2), np.mean(noise_tmp ** 2)
128
+ source_snr = 10 * np.log10(clean_power / (noise_power + EPS) + EPS)
129
+ clean_dB = target_snr - source_snr # 纯净语音还差多少dB
130
+ clean_gain = 10 ** (clean_dB / 20)
131
+ noisy = clean_gain * clean + noise
132
+ # 防止削波
133
+ if clip_check:
134
+ if np.max(np.abs(noisy)) > 1:
135
+ scale_factor = 1 / np.max(np.abs(noisy))
136
+ noisy *= scale_factor
137
+ clean *= (scale_factor * clean_gain)
138
+ return noisy, clean
139
+
140
+
141
+ def snr_aug_Interpolation(clean, noise, target_snr, hpf=False, sr=16000, order=4, cutoff=100):
142
+ """
143
+ 在已知clean_len<=noise_len的情况下
144
+ 将clean插入到noise中的snr aug方法
145
+ Args:
146
+ clean: 语音
147
+ noise: 噪声
148
+ snr: snr=random.uniform(*snr_range)
149
+ """
150
+ clean_len, noise_len = clean.shape[0], noise.shape[0]
151
+ assert clean_len <= noise_len, f"clean_len must be less than noise_len."
152
+ clean_tmp, noise_tmp = clean.copy(), noise.copy()
153
+ if hpf:
154
+ clean_tmp = HPFilter(clean_tmp, sr=sr, order=order, cutoff=cutoff)
155
+ noise_tmp = HPFilter(noise_tmp, sr=sr, order=order, cutoff=cutoff)
156
+ noisy = noise.copy()
157
+ index = random.randint(0, noise_len - clean_len)
158
+ noise_tmp = noise_tmp[index:index + clean_len, :]
159
+ # 这里必须把clip_check设置为False, 否则外面的noise和里面的不一致
160
+ noisy_tmp, clean_tmp = snr_aug_changeClean(clean_tmp, noise_tmp, target_snr, clip_check=False, hpf=False)
161
+ noisy[index:index + clean_len, :] = noisy_tmp
162
+ # 防止削波
163
+ if np.max(np.abs(noisy)) > 1:
164
+ scale_factor = 1 / np.max(np.abs(noisy))
165
+ noisy *= scale_factor
166
+ clean *= scale_factor
167
+ return noisy, np.pad(clean, ((index, noise_len - index - clean_len), (0, 0)))
168
+
169
+
170
+ def get_snr_use_vad(wav, vad, sr=16000):
171
+ # 通过vad获得语音原始的snr
172
+ wav = HPFilter(wav, sr=sr, order=6, cutoff=100)
173
+ vadstart, vadend = vad["start"], vad["end"]
174
+ noise = np.concatenate([wav[:vadstart], wav[vadend:]], axis=0)
175
+ speech_segment = wav[vadstart:vadend]
176
+
177
+ # 计算信噪比
178
+ # 统计语音段的均方功率谱
179
+ P_speech_noise = np.mean(speech_segment ** 2) # 语音+噪声功率
180
+ # P_speech_noise = np.mean(wav ** 2) # 如果用全局的, 会存在噪声功率过大的问题, 导致snr过低
181
+ P_noise = np.mean(noise ** 2) # 纯噪声功率
182
+
183
+ # 计算净语音功率(确保非负)
184
+ P_speech = max(P_speech_noise - P_noise, 1e-8)
185
+ if P_noise < 1e-8:
186
+ P_noise = 1e-8
187
+
188
+ snr = 10 * np.log10(P_speech / P_noise) # 计算 SNR
189
+ # snr保留小数点后一位
190
+ return round(snr, 1)
191
+
192
+
193
+ def snr_aug_vad_Interpolation(clean, noise, target_snr, vad, hpf=False, sr=16000, order=4, cutoff=100):
194
+ """
195
+ 在已知clean_len<=noise_len的情况下, 将clean插入到noise中的snr aug方法,
196
+ 使用VAD信息, 精确地找到语音位置
197
+ Args:
198
+ clean: 语音
199
+ noise: 噪声
200
+ vad: {"start": xxx, "end": xxx}
201
+ """
202
+ clean_len, noise_len = clean.shape[0], noise.shape[0]
203
+ assert clean_len <= noise_len, f"clean_len must be less than noise_len."
204
+ clean_tmp, noise_tmp = clean.copy(), noise.copy()
205
+ noisy = noise.copy()
206
+ index = random.randint(0, noise_len - clean_len)
207
+ noise = noise[index:index + clean_len, :] # 现在语音和噪声长度一致
208
+ if hpf:
209
+ clean_tmp = HPFilter(clean_tmp, sr=sr, order=order, cutoff=cutoff)
210
+ noise_tmp = HPFilter(noise_tmp, sr=sr, order=order, cutoff=cutoff)
211
+ # 只根据语音段求SNR
212
+ clean_vad = clean_tmp[vad["start"]:vad["end"]]
213
+ noise_tmp = noise_tmp[vad["start"]:vad["end"]]
214
+ power_clean, power_noise = np.mean(clean_vad ** 2), np.mean(noise_tmp ** 2)
215
+ snr_in = 10 * np.log10(power_clean / (power_noise + EPS) + EPS)
216
+ clean_dB = target_snr - snr_in # 语音还差多少dB
217
+ # noise_dB = snr_in - target_snr # 噪声还差多少dB
218
+ clean_gain = 10 ** (clean_dB / 20)
219
+ noisy_tmp = clean_gain * clean + noise
220
+
221
+ noisy[index:index + clean_len, :] = noisy_tmp
222
+ # 防止削波
223
+ if np.max(np.abs(noisy)) > 1:
224
+ scale_factor = 1 / np.max(np.abs(noisy))
225
+ noisy *= scale_factor
226
+ clean *= (scale_factor * clean_gain)
227
+ return noisy, np.pad(clean, ((index, noise_len - index - clean_len), (0, 0)))
228
+
229
+
230
+ def get_audio_segments(wav_len, audio_path_list, sr=16000):
231
+ """
232
+ 从音频列表中随机拼接指定长度音频
233
+ Args:
234
+ wav_len: 需要返回的音频长度
235
+ audio_path_list: 音频路径列表
236
+ sr: 采样率
237
+ Returns:返回指定长度的音频
238
+ """
239
+ audio_len = 0
240
+ wav_list = []
241
+ while audio_len < wav_len:
242
+ audio_path = random.choice(audio_path_list)
243
+ wav, wav_sr = sf.read(audio_path, always_2d=True, dtype='float32')
244
+ assert wav_sr == sr, f"音频采样率是{wav_sr}, 期望{sr}"
245
+ audio_len += len(wav)
246
+ wav_list.append(wav)
247
+ wav = np.concatenate(wav_list, axis=0)
248
+ if len(wav) > wav_len:
249
+ # 随机截取clean_len
250
+ start = random.randint(0, len(wav) - wav_len)
251
+ wav = wav[start:start + wav_len, :]
252
+ return wav
253
+
254
+
255
+ # ----------------------------------------------------------------
256
+ # 音量增强
257
+ # ----------------------------------------------------------------
258
+ def volume_norm(wav):
259
+ """
260
+ 音量归一化
261
+ :param wav: (T,)
262
+ :return: (T,)
263
+ """
264
+ wav = wav / (np.max(np.abs(wav)) + 1e-8)
265
+ return wav
266
+
267
+
268
+ def volume_aug(wav, range, rate, method="linmax"):
269
+ """音量增强 """
270
+ if random.random() < rate:
271
+ target_level = random.uniform(range[0], range[1])
272
+ if method == "dbrms":
273
+ wav_rms = (wav ** 2).mean() ** 0.5
274
+ scalar = 10 ** (target_level / 20) / (np.max(wav_rms) + EPS)
275
+ elif method == "linmax":
276
+ ipt_max = np.max(np.abs(wav))
277
+ scalar = target_level / (ipt_max + EPS)
278
+ else:
279
+ raise ValueError("method must be 'dbrms' or 'linmax'")
280
+ wav *= scalar
281
+ return wav
282
+
283
+
284
+ def volume_aug_dbrms(wav, target_level, hpf=False, sr=16000, order=4, cutoff=100):
285
+ """
286
+ 音量增强, 使用dbrms方法
287
+ Args:
288
+ wav: 音频
289
+ target_level: 目标音量, 单位dB
290
+ hpf: 是否高通滤波
291
+ sr: 采样率
292
+ order: 滤波器阶数
293
+ cutoff: 截止频率
294
+ """
295
+ wav_tmp = wav.copy()
296
+ if hpf:
297
+ wav_tmp = HPFilter(wav_tmp, sr=sr, order=order, cutoff=cutoff)
298
+ wav_rms = (wav_tmp ** 2).mean() ** 0.5
299
+ scalar = 10 ** (target_level / 20) / (np.max(wav_rms) + EPS)
300
+ wav *= scalar
301
+ wav = limiter(wav)
302
+ return wav
303
+
304
+
305
+ def volume_aug_linmax(wav, target_level, hpf=False, sr=16000, order=4, cutoff=100):
306
+ """
307
+ 音量增强, 使用linmax方法
308
+ Args:
309
+ wav: 音频
310
+ target_level: 目标音量, 单位dB
311
+ hpf: 是否高通滤波
312
+ sr: 采样率
313
+ order: 滤波器阶数
314
+ cutoff: 截止频率
315
+ """
316
+ assert target_level > 0 and target_level < 1, "target_level must be between 0 and 1"
317
+ wav_tmp = wav.copy()
318
+ if hpf:
319
+ wav_tmp = HPFilter(wav_tmp, sr=sr, order=order, cutoff=cutoff)
320
+ wav_max = np.max(np.abs(wav_tmp))
321
+ scalar = target_level / (wav_max + EPS)
322
+ wav *= scalar
323
+ wav = limiter(wav)
324
+
325
+ return wav
326
+
327
+
328
+ import pyloudnorm as pyln
329
+
330
+ def volume_aug_lufs(wav, target_lufs, hpf=False, sr=16000, order=4, cutoff=100):
331
+ """
332
+ 音量增强, 使用lufs方法,
333
+ LUFS是“感知响度” → 跟人耳听感对齐,而且符合国际响度标准。
334
+
335
+ LUFS 使用 感知加权(K-weighting)
336
+ - 高频增强(模拟人耳在 3~6kHz 的敏感)
337
+ - 低频衰减(降低 <100Hz 对响度的影响)。
338
+ 使用 短时块(400ms)能量 + 响度门限(-70 LUFS) 过滤极静音段。
339
+
340
+ Args:
341
+ wav: 音频
342
+ target_lufs: 目标音量, 单位lufs
343
+ hpf: 是否高通滤波
344
+ sr: 采样率
345
+ order: 滤波器阶数
346
+ cutoff: 截止频率
347
+
348
+ 补充信息:
349
+ ## 推荐的 target_lufs 值(行业参考)
350
+ 平台 推荐目标 LUFS
351
+ YouTube / Spotify -14
352
+ Apple Music -16
353
+ 广播 / TV -23
354
+ 游戏音频 -16 ~ -18
355
+ 有声书 -18 ~ -20
356
+ """
357
+ wav_tmp = wav.copy()
358
+ if hpf:
359
+ wav_tmp = HPFilter(wav_tmp, sr=sr, order=4, cutoff=1000)
360
+
361
+ # Step2: 创建 LUFS 测量器(ITU-R BS.1770)
362
+ meter = pyln.Meter(sr, block_size=0.400) # block_size=400ms
363
+
364
+ # Step3: 测量当前 LUFS
365
+ loudness = meter.integrated_loudness(wav_tmp)
366
+
367
+ # Step4: 计算增益并应用
368
+ loudness_diff = target_lufs - loudness
369
+ scalar = 10 ** (loudness_diff / 20.0)
370
+ wav = wav * scalar
371
+
372
+ wav = limiter(wav, threshold=0.999) # Step5: 限幅
373
+ return wav
374
+
375
+ def measure_loudness(wav, sr):
376
+ """
377
+ 测量音频的 Peak / RMS / LUFS,以及峰均比(Crest Factor)
378
+
379
+ 参数:
380
+ wav: np.ndarray, 音频波形(范围 [-1, 1])
381
+ sr: int, 采样率
382
+
383
+ 返回:
384
+ dict:
385
+ - peak_dbfs: 峰值(dBFS)
386
+ - rms_dbfs: 均方根电平(dBFS)
387
+ - lufs: 感知响度(LUFS,ITU-R BS.1770-4标准)
388
+ - crest_factor_db: 峰均比(dB),峰值与RMS的差值
389
+ """
390
+ EPS = 1e-9
391
+
392
+ # 1. Peak
393
+ peak_linear = np.max(np.abs(wav))
394
+ peak_dbfs = 20 * np.log10(peak_linear + EPS)
395
+
396
+ # 2. RMS
397
+ rms_val = np.sqrt(np.mean(wav ** 2))
398
+ rms_dbfs = 20 * np.log10(rms_val + EPS)
399
+
400
+ # 3. LUFS
401
+ meter = pyln.Meter(sr, block_size=0.400) # 400ms 块
402
+ loudness_lufs = meter.integrated_loudness(wav)
403
+
404
+ # 4. Crest Factor (峰均比)
405
+ crest_factor_db = peak_dbfs - rms_dbfs
406
+
407
+ return {
408
+ "peak_dbfs": peak_dbfs,
409
+ "rms_dbfs": rms_dbfs,
410
+ "lufs": loudness_lufs,
411
+ "crest_factor_db": crest_factor_db
412
+ }
413
+
414
+
415
+ def volume_scale_Conversion