neverlib 0.2.4__py3-none-any.whl → 0.2.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (260) hide show
  1. neverlib/.history/Docs/audio_aug/del_20250827162530.py +0 -0
  2. neverlib/.history/Docs/audio_aug/del_20250827162540.py +2 -0
  3. neverlib/.history/Docs/audio_aug/del_20250827162541.py +7 -0
  4. neverlib/.history/Docs/audio_aug/del_20250827162606.py +7 -0
  5. neverlib/.history/Docs/audio_aug/del_20250827162637.py +8 -0
  6. neverlib/.history/Docs/audio_aug/del_20250827162645.py +8 -0
  7. neverlib/.history/Docs/audio_aug/del_20250827162723.py +9 -0
  8. neverlib/.history/Docs/audio_aug/del_20250827162739.py +9 -0
  9. neverlib/.history/Docs/audio_aug/test_snr_20250827161751.py +55 -0
  10. neverlib/.history/Docs/audio_aug/test_snr_20250827161754.py +55 -0
  11. neverlib/.history/Docs/audio_aug/test_snr_20250827161833.py +54 -0
  12. neverlib/.history/Docs/audio_aug/test_snr_20250827162017.py +56 -0
  13. neverlib/.history/Docs/audio_aug/test_snr_20250827162021.py +57 -0
  14. neverlib/.history/Docs/audio_aug/test_snr_20250827162028.py +57 -0
  15. neverlib/.history/Docs/audio_aug_test/del_20250827162738.py +9 -0
  16. neverlib/.history/Docs/audio_aug_test/del_20250827162819.py +9 -0
  17. neverlib/.history/Docs/audio_aug_test/del_20250827162830.py +9 -0
  18. neverlib/.history/Docs/audio_aug_test/del_20250827162846.py +9 -0
  19. neverlib/.history/Docs/audio_aug_test/del_20250827162851.py +9 -0
  20. neverlib/.history/Docs/audio_aug_test/del_20250827162903.py +10 -0
  21. neverlib/.history/Docs/audio_aug_test/del_20250827162921.py +10 -0
  22. neverlib/.history/Docs/audio_aug_test/del_20250827162926.py +10 -0
  23. neverlib/.history/Docs/audio_aug_test/del_20250827163030.py +10 -0
  24. neverlib/.history/Docs/audio_aug_test/del_20250827163032.py +10 -0
  25. neverlib/.history/QA/html2markdown_20250822234112.md +0 -0
  26. neverlib/.history/QA/html2markdown_20250822234140.py +9 -0
  27. neverlib/.history/QA/html2markdown_20250822234141.md +9 -0
  28. neverlib/.history/QA/html2markdown_20250822234159.py +12 -0
  29. neverlib/.history/QA/html2markdown_20250822234200.py +17 -0
  30. neverlib/.history/QA/html2markdown_20250822234236.py +17 -0
  31. neverlib/.history/QA/html2markdown_20250822234340.py +14 -0
  32. neverlib/.history/QA/html2markdown_20250822234522.py +18 -0
  33. neverlib/.history/QA/html2markdown_20250822234601.py +20 -0
  34. neverlib/.history/QA/html2markdown_20250822234615.py +22 -0
  35. neverlib/.history/QA/html2markdown_20250822234715.py +28 -0
  36. neverlib/.history/QA/html2markdown_20250822234720.py +27 -0
  37. neverlib/.history/QA/html2markdown_20250822234903.py +27 -0
  38. neverlib/.history/__init___20250805234212.py +41 -0
  39. neverlib/.history/__init___20250904102635.py +39 -0
  40. neverlib/.history/__init___20250904102836.py +34 -0
  41. neverlib/.history/__init___20250904102838.py +39 -0
  42. neverlib/.history/__init___20250904102851.py +33 -0
  43. neverlib/.history/audio_aug/audio_aug_20250826155913.py +158 -0
  44. neverlib/.history/audio_aug/audio_aug_20250826164159.py +159 -0
  45. neverlib/.history/audio_aug/audio_aug_20250826164217.py +160 -0
  46. neverlib/.history/audio_aug/audio_aug_20250826164408.py +161 -0
  47. neverlib/.history/audio_aug/audio_aug_20250826164423.py +161 -0
  48. neverlib/.history/audio_aug/audio_aug_20250826164529.py +161 -0
  49. neverlib/.history/audio_aug/audio_aug_20250826164824.py +161 -0
  50. neverlib/.history/audio_aug/audio_aug_20250826164932.py +162 -0
  51. neverlib/.history/audio_aug/audio_aug_20250826164947.py +162 -0
  52. neverlib/.history/audio_aug/audio_aug_20250826165403.py +162 -0
  53. neverlib/.history/audio_aug/audio_aug_20250826165421.py +162 -0
  54. neverlib/.history/audio_aug/audio_aug_20250826165509.py +163 -0
  55. neverlib/.history/audio_aug/audio_aug_20250826165702.py +163 -0
  56. neverlib/.history/audio_aug/audio_aug_20250826165732.py +165 -0
  57. neverlib/.history/audio_aug/audio_aug_20250826170041.py +163 -0
  58. neverlib/.history/audio_aug/audio_aug_20250826170105.py +164 -0
  59. neverlib/.history/audio_aug/audio_aug_20250826170154.py +164 -0
  60. neverlib/.history/audio_aug/audio_aug_20250826170220.py +165 -0
  61. neverlib/.history/audio_aug/audio_aug_20250826170221.py +165 -0
  62. neverlib/.history/audio_aug/audio_aug_20250826170228.py +165 -0
  63. neverlib/.history/audio_aug/audio_aug_20250826170231.py +165 -0
  64. neverlib/.history/audio_aug/audio_aug_20250826212001.py +165 -0
  65. neverlib/.history/audio_aug/audio_aug_20250826220038.py +165 -0
  66. neverlib/.history/audio_aug/audio_aug_20250826220133.py +165 -0
  67. neverlib/.history/audio_aug/audio_aug_20250826220148.py +165 -0
  68. neverlib/.history/audio_aug/audio_aug_20250826220154.py +165 -0
  69. neverlib/.history/audio_aug/audio_aug_20250826220156.py +165 -0
  70. neverlib/.history/audio_aug/audio_aug_20250826220314.py +165 -0
  71. neverlib/.history/audio_aug/audio_aug_20250826220343.py +184 -0
  72. neverlib/.history/audio_aug/audio_aug_20250826220345.py +184 -0
  73. neverlib/.history/audio_aug/audio_aug_20250826220349.py +184 -0
  74. neverlib/.history/audio_aug/audio_aug_20250826220429.py +184 -0
  75. neverlib/.history/audio_aug/audio_aug_20250826220447.py +184 -0
  76. neverlib/.history/audio_aug/audio_aug_20250826220601.py +186 -0
  77. neverlib/.history/audio_aug/audio_aug_20250826220638.py +186 -0
  78. neverlib/.history/audio_aug/audio_aug_20250826220641.py +186 -0
  79. neverlib/.history/audio_aug/audio_aug_20250826220647.py +186 -0
  80. neverlib/.history/audio_aug/audio_aug_20250826220653.py +186 -0
  81. neverlib/.history/audio_aug/audio_aug_20250826220655.py +186 -0
  82. neverlib/.history/audio_aug/audio_aug_20250826220731.py +185 -0
  83. neverlib/.history/audio_aug/audio_aug_20250826220739.py +185 -0
  84. neverlib/.history/audio_aug/audio_aug_20250826220747.py +185 -0
  85. neverlib/.history/audio_aug/audio_aug_20250826220801.py +186 -0
  86. neverlib/.history/audio_aug/audio_aug_20250826220822.py +186 -0
  87. neverlib/.history/audio_aug/audio_aug_20250826220901.py +186 -0
  88. neverlib/.history/audio_aug/audio_aug_20250826221107.py +187 -0
  89. neverlib/.history/audio_aug/audio_aug_20250826221310.py +188 -0
  90. neverlib/.history/audio_aug/audio_aug_20250826221353.py +191 -0
  91. neverlib/.history/audio_aug/audio_aug_20250826221821.py +191 -0
  92. neverlib/.history/audio_aug/audio_aug_20250826221838.py +191 -0
  93. neverlib/.history/audio_aug/audio_aug_20250826221906.py +191 -0
  94. neverlib/.history/audio_aug/audio_aug_20250826221930.py +191 -0
  95. neverlib/.history/audio_aug/audio_aug_20250826221939.py +191 -0
  96. neverlib/.history/audio_aug/audio_aug_20250826221955.py +191 -0
  97. neverlib/.history/audio_aug/audio_aug_20250826222008.py +197 -0
  98. neverlib/.history/audio_aug/audio_aug_20250826222017.py +200 -0
  99. neverlib/.history/audio_aug/audio_aug_20250826222046.py +203 -0
  100. neverlib/.history/audio_aug/audio_aug_20250826222105.py +203 -0
  101. neverlib/.history/audio_aug/audio_aug_20250826222206.py +203 -0
  102. neverlib/.history/audio_aug/audio_aug_20250826222302.py +203 -0
  103. neverlib/.history/audio_aug/audio_aug_20250826222336.py +203 -0
  104. neverlib/.history/audio_aug/audio_aug_20250826222455.py +204 -0
  105. neverlib/.history/audio_aug/audio_aug_20250826222526.py +204 -0
  106. neverlib/.history/audio_aug/audio_aug_20250826222541.py +204 -0
  107. neverlib/.history/audio_aug/audio_aug_20250826222624.py +202 -0
  108. neverlib/.history/audio_aug/audio_aug_20250826222714.py +205 -0
  109. neverlib/.history/audio_aug/audio_aug_20250826222820.py +205 -0
  110. neverlib/.history/audio_aug/audio_aug_20250826222827.py +205 -0
  111. neverlib/.history/audio_aug/audio_aug_20250826222927.py +232 -0
  112. neverlib/.history/audio_aug/audio_aug_20250826223009.py +232 -0
  113. neverlib/.history/audio_aug/audio_aug_20250826223054.py +232 -0
  114. neverlib/.history/audio_aug/audio_aug_20250826223225.py +233 -0
  115. neverlib/.history/audio_aug/audio_aug_20250826223344.py +236 -0
  116. neverlib/.history/audio_aug/audio_aug_20250826223356.py +236 -0
  117. neverlib/.history/audio_aug/audio_aug_20250826223955.py +242 -0
  118. neverlib/.history/audio_aug/audio_aug_20250826224210.py +240 -0
  119. neverlib/.history/audio_aug/audio_aug_20250826224250.py +242 -0
  120. neverlib/.history/audio_aug/audio_aug_20250826224323.py +280 -0
  121. neverlib/.history/audio_aug/audio_aug_20250826224452.py +263 -0
  122. neverlib/.history/audio_aug/audio_aug_20250826224455.py +263 -0
  123. neverlib/.history/audio_aug/audio_aug_20250826224502.py +263 -0
  124. neverlib/.history/audio_aug/audio_aug_20250826224528.py +263 -0
  125. neverlib/.history/audio_aug/audio_aug_20250826224658.py +263 -0
  126. neverlib/.history/audio_aug/audio_aug_20250826224833.py +264 -0
  127. neverlib/.history/audio_aug/audio_aug_20250826225013.py +269 -0
  128. neverlib/.history/audio_aug/audio_aug_20250826225050.py +269 -0
  129. neverlib/.history/audio_aug/audio_aug_20250826225241.py +268 -0
  130. neverlib/.history/audio_aug/audio_aug_20250826225315.py +266 -0
  131. neverlib/.history/audio_aug/audio_aug_20250826225404.py +266 -0
  132. neverlib/.history/audio_aug/audio_aug_20250826225502.py +265 -0
  133. neverlib/.history/audio_aug/audio_aug_20250826225950.py +267 -0
  134. neverlib/.history/audio_aug/audio_aug_20250826225959.py +268 -0
  135. neverlib/.history/audio_aug/audio_aug_20250826230222.py +271 -0
  136. neverlib/.history/audio_aug/audio_aug_20250826230248.py +270 -0
  137. neverlib/.history/audio_aug/audio_aug_20250826230638.py +266 -0
  138. neverlib/.history/audio_aug/audio_aug_20250826230755.py +266 -0
  139. neverlib/.history/audio_aug/audio_aug_20250826230941.py +265 -0
  140. neverlib/.history/audio_aug/audio_aug_20250826231054.py +266 -0
  141. neverlib/.history/audio_aug/audio_aug_20250826231117.py +266 -0
  142. neverlib/.history/audio_aug/audio_aug_20250826231219.py +266 -0
  143. neverlib/.history/audio_aug/audio_aug_20250826232330.py +266 -0
  144. neverlib/.history/audio_aug/audio_aug_20250826232352.py +266 -0
  145. neverlib/.history/audio_aug/audio_aug_20250827152748.py +268 -0
  146. neverlib/.history/audio_aug/audio_aug_20250827152806.py +268 -0
  147. neverlib/.history/audio_aug/audio_aug_20250827152808.py +268 -0
  148. neverlib/.history/audio_aug/audio_aug_20250827152917.py +283 -0
  149. neverlib/.history/audio_aug/audio_aug_20250827152929.py +281 -0
  150. neverlib/.history/audio_aug/audio_aug_20250827153100.py +286 -0
  151. neverlib/.history/audio_aug/audio_aug_20250827153102.py +286 -0
  152. neverlib/.history/audio_aug/audio_aug_20250827153301.py +295 -0
  153. neverlib/.history/audio_aug/audio_aug_20250827153331.py +298 -0
  154. neverlib/.history/audio_aug/audio_aug_20250827153525.py +303 -0
  155. neverlib/.history/audio_aug/audio_aug_20250827153533.py +304 -0
  156. neverlib/.history/audio_aug/audio_aug_20250827153541.py +321 -0
  157. neverlib/.history/audio_aug/audio_aug_20250827153805.py +322 -0
  158. neverlib/.history/audio_aug/audio_aug_20250827153832.py +323 -0
  159. neverlib/.history/audio_aug/audio_aug_20250827153836.py +324 -0
  160. neverlib/.history/audio_aug/audio_aug_20250827153846.py +324 -0
  161. neverlib/.history/audio_aug/audio_aug_20250827153859.py +325 -0
  162. neverlib/.history/audio_aug/audio_aug_20250827154453.py +337 -0
  163. neverlib/.history/audio_aug/audio_aug_20250827154513.py +355 -0
  164. neverlib/.history/audio_aug/audio_aug_20250827154538.py +356 -0
  165. neverlib/.history/audio_aug/audio_aug_20250827154541.py +357 -0
  166. neverlib/.history/audio_aug/audio_aug_20250827154612.py +357 -0
  167. neverlib/.history/audio_aug/audio_aug_20250827154657.py +360 -0
  168. neverlib/.history/audio_aug/audio_aug_20250827154708.py +360 -0
  169. neverlib/.history/audio_aug/audio_aug_20250827154728.py +366 -0
  170. neverlib/.history/audio_aug/audio_aug_20250827154755.py +367 -0
  171. neverlib/.history/audio_aug/audio_aug_20250827154800.py +367 -0
  172. neverlib/.history/audio_aug/audio_aug_20250827154917.py +368 -0
  173. neverlib/.history/audio_aug/audio_aug_20250827154928.py +369 -0
  174. neverlib/.history/audio_aug/audio_aug_20250827154932.py +370 -0
  175. neverlib/.history/audio_aug/audio_aug_20250827154947.py +372 -0
  176. neverlib/.history/audio_aug/audio_aug_20250827155015.py +375 -0
  177. neverlib/.history/audio_aug/audio_aug_20250827155106.py +375 -0
  178. neverlib/.history/audio_aug/audio_aug_20250827155114.py +393 -0
  179. neverlib/.history/audio_aug/audio_aug_20250827155207.py +415 -0
  180. neverlib/.history/audio_aug/audio_aug_20250827155300.py +415 -0
  181. neverlib/.history/audio_aug/audio_aug_20250827155321.py +471 -0
  182. neverlib/.history/audio_aug/audio_aug_20250827164703.py +471 -0
  183. neverlib/.history/audio_aug/audio_aug_20250827164749.py +471 -0
  184. neverlib/.history/audio_aug/audio_aug_20250827165252.py +472 -0
  185. neverlib/.history/audio_aug/audio_aug_20250827165334.py +472 -0
  186. neverlib/.history/audio_aug/audio_aug_20250827165404.py +473 -0
  187. neverlib/.history/audio_aug/audio_aug_20250827165610.py +473 -0
  188. neverlib/.history/audio_aug/audio_aug_20250827165805.py +473 -0
  189. neverlib/.history/audio_aug/audio_aug_20250827170056.py +473 -0
  190. neverlib/.history/audio_aug/audio_aug_20250827170106.py +472 -0
  191. neverlib/.history/audio_aug/audio_aug_20250827170143.py +472 -0
  192. neverlib/.history/audio_aug/audio_aug_20250827170216.py +472 -0
  193. neverlib/.history/audio_aug/audio_aug_20250827170218.py +472 -0
  194. neverlib/.history/audio_aug/audio_aug_20250827170314.py +472 -0
  195. neverlib/.history/audio_aug/audio_aug_20250827171500.py +471 -0
  196. neverlib/.history/audio_aug/audio_aug_20250827172347.py +471 -0
  197. neverlib/.history/audio_aug/audio_aug_20250827172558.py +470 -0
  198. neverlib/.history/audio_aug/audio_aug_20250827172559.py +470 -0
  199. neverlib/.history/audio_aug/audio_aug_20250827172801.py +470 -0
  200. neverlib/.history/audio_aug/audio_aug_20250827182522.py +470 -0
  201. neverlib/.history/audio_aug/audio_aug_20250827182526.py +470 -0
  202. neverlib/.history/audio_aug/audio_aug_20250827182626.py +470 -0
  203. neverlib/.history/audio_aug/audio_aug_20250827182715.py +470 -0
  204. neverlib/.history/audio_aug/audio_aug_20250904185444.py +470 -0
  205. neverlib/.history/audio_aug/audio_aug_20250904185538.py +445 -0
  206. neverlib/.history/data_analyze/__init___20250806204158.py +14 -0
  207. neverlib/.history/data_analyze/__init___20250827163248.py +14 -0
  208. neverlib/.history/filter/auto_eq/freq_eq_20250821143140.py +76 -0
  209. neverlib/.history/filter/auto_eq/freq_eq_20250821153208.py +76 -0
  210. neverlib/.history/filter/auto_eq/freq_eq_20250821153214.py +76 -0
  211. neverlib/.history/filter/auto_eq/ga_eq_basic_20250901110521.py +385 -0
  212. neverlib/.history/filter/auto_eq/ga_eq_basic_20250901110652.py +385 -0
  213. neverlib/.history/filter/common_20250806002134.py +37 -0
  214. neverlib/.history/filter/common_20250821120448.py +49 -0
  215. neverlib/.history/filter/common_20250821120453.py +49 -0
  216. neverlib/.history/metrics/snr_20250827224201.py +182 -0
  217. neverlib/.history/metrics/snr_20250827234019.py +186 -0
  218. neverlib/.history/metrics/snr_20250827234028.py +186 -0
  219. neverlib/.history/metrics/snr_20250827234030.py +186 -0
  220. neverlib/.history/utils/audio_split_20250805234209.py +268 -0
  221. neverlib/.history/utils/audio_split_20250904185309.py +268 -0
  222. neverlib/.history/utils/utils_20250813165516.py +330 -0
  223. neverlib/.history/utils/utils_20250904181341.py +328 -0
  224. neverlib/.history/utils/utils_20250904185546.py +352 -0
  225. neverlib/.history/utils/utils_20250904185548.py +353 -0
  226. neverlib/.history/utils/utils_20250904185603.py +353 -0
  227. neverlib/.history/utils/utils_20250904185636.py +353 -0
  228. neverlib/.history/utils/utils_20250904185658.py +358 -0
  229. neverlib/.history/utils/utils_20250904190053.py +359 -0
  230. neverlib/.specstory/history/2025-08-22_02-10Z-/345/256/214/345/226/204/345/207/275/346/225/260/347/232/204/345/212/237/350/203/275/345/222/214/345/217/230/351/207/217/345/220/215/345/273/272/350/256/256.md +247 -0
  231. neverlib/.specstory/history/2025-08-26_11-54Z-oserror-missing-shared-object-file.md +87 -0
  232. neverlib/.specstory/history/2025-08-27_08-07Z-/345/256/214/345/226/204/346/265/213/350/257/225/346/226/207/346/241/243/347/232/204/350/256/250/350/256/272.md +296 -0
  233. neverlib/.specstory/history/2025-08-27_08-29Z-delete-python-file-command.md +211 -0
  234. neverlib/.specstory/history/2025-08-27_09-05Z-/345/234/250jupyter/344/270/255/346/222/255/346/224/276/351/237/263/351/242/221/347/232/204/344/273/243/347/240/201/344/277/256/346/224/271.md +357 -0
  235. neverlib/Docs/audio_aug_test/test_snr.py +55 -0
  236. neverlib/Docs/audio_aug_test/test_volume.py +0 -0
  237. neverlib/QA/html2markdown.py +27 -0
  238. neverlib/__init__.py +10 -20
  239. neverlib/audio_aug/__init__.py +6 -1
  240. neverlib/audio_aug/audio_aug.py +360 -55
  241. neverlib/data_analyze/__init__.py +8 -2
  242. neverlib/data_analyze/temporal_features.py +1 -1
  243. neverlib/filter/__init__.py +9 -3
  244. neverlib/filter/auto_eq/freq_eq.py +1 -1
  245. neverlib/filter/auto_eq/ga_eq_basic.py +3 -3
  246. neverlib/filter/common.py +12 -0
  247. neverlib/metrics/snr.py +5 -3
  248. neverlib/utils/__init__.py +14 -7
  249. neverlib/utils/lazy_module.py +81 -0
  250. neverlib/utils/message.py +3 -8
  251. neverlib/utils/utils.py +32 -3
  252. neverlib/vad/__init__.py +16 -9
  253. neverlib/vad/utils.py +20 -6
  254. {neverlib-0.2.4.dist-info → neverlib-0.2.6.dist-info}/METADATA +21 -17
  255. neverlib-0.2.6.dist-info/RECORD +467 -0
  256. neverlib-0.2.4.dist-info/RECORD +0 -229
  257. /neverlib/{Docs/audio_aug/test_snr.py → .history/Docs/audio_aug/test_snr_20250827162033.py} +0 -0
  258. {neverlib-0.2.4.dist-info → neverlib-0.2.6.dist-info}/WHEEL +0 -0
  259. {neverlib-0.2.4.dist-info → neverlib-0.2.6.dist-info}/licenses/LICENSE +0 -0
  260. {neverlib-0.2.4.dist-info → neverlib-0.2.6.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,353 @@
1
+ # -*- coding:utf-8 -*-
2
+ # Author:凌逆战 | Never
3
+ # Date: 2023/9/25
4
+ """
5
+ folder处理
6
+ """
7
+ import os
8
+ import random
9
+ import shutil
10
+ import fnmatch
11
+ from tqdm import tqdm
12
+ from datetime import datetime
13
+ import soundfile as sf
14
+ import numpy as np
15
+ EPS = np.finfo(float).eps
16
+
17
+
18
+ def get_path_list(source_path, end="*.wav", shuffle=False):
19
+ wav_list = []
20
+ for root, dirnames, filenames in os.walk(source_path):
21
+ # 实现列表特殊字符的过滤或筛选,返回符合匹配“.wav”字符列表
22
+ for filename in fnmatch.filter(filenames, end):
23
+ wav_list.append(os.path.join(root, filename))
24
+ if os.environ.get("LOCAL_RANK", "0") == "0":
25
+ print(source_path, len(wav_list))
26
+ if shuffle:
27
+ random.shuffle(wav_list)
28
+ return wav_list
29
+
30
+
31
+ def get_audio_segments(wav_len, wav_path_list, sr=16000, insert_silence=None):
32
+ """
33
+ 从音频列表中随机拼接指定长度音频
34
+ Args:
35
+ wav_len: 需要返回的音频长度
36
+ audio_path_list: 音频路径列表
37
+ sr: 采样率
38
+ Returns:返回指定长度的音频
39
+ """
40
+ audio_len = 0
41
+ wav_list = []
42
+ while audio_len < wav_len:
43
+ audio_path = random.choice(wav_path_list)
44
+ wav, wav_sr = sf.read(audio_path, always_2d=True, dtype='float32')
45
+ assert wav_sr == sr, f"音频采样率是{wav_sr}, 期望{sr}"
46
+ audio_len += len(wav)
47
+ wav_list.append(wav)
48
+ wav = np.concatenate(wav_list, axis=0)
49
+ if len(wav) > wav_len:
50
+ # 随机截取clean_len
51
+ start = random.randint(0, len(wav) - wav_len)
52
+ wav = wav[start:start + wav_len, :]
53
+ return wav
54
+
55
+
56
+ def rename_files_and_folders(directory, replace='_-', replacement='_'):
57
+ # 将路径的指定字符替换为指定字符
58
+ for root, dirs, files in os.walk(directory):
59
+ for filename in files:
60
+ if replace in filename:
61
+ new_filename = filename.replace(replace, replacement)
62
+ old_path = os.path.join(root, filename)
63
+ new_path = os.path.join(root, new_filename)
64
+ os.rename(old_path, new_path)
65
+ print(f'Renamed file: {old_path} -> {new_path}')
66
+
67
+ for folder in dirs:
68
+ if replace in folder:
69
+ new_folder = folder.replace(replace, replacement)
70
+ old_path = os.path.join(root, folder)
71
+ new_path = os.path.join(root, new_folder)
72
+ os.rename(old_path, new_path)
73
+ print(f'Renamed folder: {old_path} -> {new_path}')
74
+
75
+
76
+ def get_file_time(file_path):
77
+ # 获取最后修改时间
78
+ mod_time = os.path.getmtime(file_path)
79
+ # 转为data_time格式: 年-月-日-时-分-秒
80
+ datetime_dt = datetime.fromtimestamp(mod_time)
81
+
82
+ # 如果时间早于2024-09-04 02:00:00, 则删除
83
+ # if datetime_dt < datetime(2024, 9, 4, 2, 0, 0):
84
+ # print(file_path)
85
+ return datetime_dt
86
+
87
+
88
+ def TrainValSplit(dataset_dir, train_dir, val_dir, percentage=0.9):
89
+ """ 分割数据集为训练集和验证集
90
+ :param dataset_dir: 源数据集地址
91
+ :param train_dir: 训练集地址
92
+ :param val_dir: 验证集地址
93
+ :param percentage: 分割百分比
94
+ """
95
+ wav_path_list = get_path_list(dataset_dir, end="*.wav", shuffle=True)
96
+ total_wav_num = len(wav_path_list)
97
+ # 计算训练集和验证集的分割点
98
+ split_idx = int(total_wav_num * percentage)
99
+ train_path_list, val_path_list = wav_path_list[:split_idx], wav_path_list[split_idx:]
100
+
101
+ for train_wavpath in tqdm(train_path_list, desc="Copying train wav"):
102
+ target_path = train_wavpath.replace(dataset_dir, train_dir)
103
+ if not os.path.exists(os.path.split(target_path)[0]):
104
+ os.makedirs(os.path.split(target_path)[0])
105
+ shutil.copy(train_wavpath, target_path)
106
+
107
+ for val_wavpath in tqdm(val_path_list, desc="Copying val wav"):
108
+ target_path = val_wavpath.replace(dataset_dir, val_dir)
109
+ if not os.path.exists(os.path.split(target_path)[0]):
110
+ os.makedirs(os.path.split(target_path)[0])
111
+ shutil.copy(val_wavpath, target_path)
112
+
113
+ print("Done!")
114
+
115
+
116
+ def TrainValTestSplit(dataset_dir, train_dir, val_dir, test_dir, percentage=[0.8, 0.1, 0.1]):
117
+ """ 分割数据集为训练集、验证集和测试集
118
+ :param dataset_dir: 源数据集地址
119
+ :param train_dir: 训练集地址
120
+ :param val_dir: 验证集地址
121
+ :param test_dir: 测试集地址
122
+ :param percentage: 分割百分比
123
+ """
124
+ assert sum(percentage) == 1.0, "百分比总和必须等于1.0"
125
+
126
+ wav_path_list = sorted(get_path_list(dataset_dir, end="*.wav"))
127
+ random.seed(10086)
128
+ random.shuffle(wav_path_list) # 打乱列表的顺序
129
+ total_wav_num = len(wav_path_list)
130
+
131
+ # 计算训练集、验证集和测试集的分割点
132
+ train_split_idx = int(total_wav_num * percentage[0])
133
+ val_split_idx = train_split_idx + int(total_wav_num * percentage[1])
134
+
135
+ train_path_list = wav_path_list[:train_split_idx]
136
+ val_path_list = wav_path_list[train_split_idx:val_split_idx]
137
+ test_path_list = wav_path_list[val_split_idx:]
138
+
139
+ for train_wavpath in tqdm(train_path_list, desc="复制训练集音频"):
140
+ target_path = train_wavpath.replace(dataset_dir, train_dir)
141
+ if not os.path.exists(os.path.split(target_path)[0]):
142
+ os.makedirs(os.path.split(target_path)[0])
143
+ shutil.copy(train_wavpath, target_path)
144
+
145
+ for val_wavpath in tqdm(val_path_list, desc="复制验证集音频"):
146
+ target_path = val_wavpath.replace(dataset_dir, val_dir)
147
+ if not os.path.exists(os.path.split(target_path)[0]):
148
+ os.makedirs(os.path.split(target_path)[0])
149
+ shutil.copy(val_wavpath, target_path)
150
+
151
+ for test_wavpath in tqdm(test_path_list, desc="复制测试集音频"):
152
+ target_path = test_wavpath.replace(dataset_dir, test_dir)
153
+ if not os.path.exists(os.path.split(target_path)[0]):
154
+ os.makedirs(os.path.split(target_path)[0])
155
+ shutil.copy(test_wavpath, target_path)
156
+
157
+ print(f"完成! 训练集: {len(train_path_list)}个文件, 验证集: {len(val_path_list)}个文件, 测试集: {len(test_path_list)}个文件")
158
+
159
+
160
+ def get_leaf_folders(directory):
161
+ # 获取最底层的文件夹路径
162
+ leaf_folders = []
163
+ for root, dirs, _ in os.walk(directory):
164
+ if not dirs: # 如果当前文件夹没有子文件夹
165
+ leaf_folders.append(root)
166
+ return leaf_folders
167
+
168
+
169
+ def del_empty_folders(path):
170
+ """递归删除空文件夹(先删除子文件夹, 再删除父文件夹)"""
171
+ if not os.path.isdir(path):
172
+ return
173
+
174
+ # 获取子文件夹
175
+ subfolders = [os.path.join(path, d) for d in os.listdir(path) if os.path.isdir(os.path.join(path, d))]
176
+
177
+ # 递归处理子文件夹
178
+ for subfolder in subfolders:
179
+ del_empty_folders(subfolder)
180
+
181
+ # 如果文件夹为空,则删除
182
+ if not os.listdir(path):
183
+ os.rmdir(path)
184
+ print(f"删除空文件夹: {path}")
185
+
186
+
187
+ def DatasetSubfloderSplit(source_dir, split_dirs, percentage=None):
188
+ """
189
+ 将一个数据集按照子文件夹数量分割成train/val/test数据集
190
+ Args:
191
+ source_dir (str): 源数据集目录
192
+ split_dirs (list): 目标目录列表, 如 [train_dir, val_dir] 或 [train_dir, val_dir, test_dir]
193
+ percentage (list, optional): 分割比例, 如 [0.9, 0.1] 或 [0.8, 0.1, 0.1]。默认为 None, 此时:
194
+ - 如果是两路分割, 默认为 [0.9, 0.1]
195
+ - 如果是三路分割, 默认为 [0.8, 0.1, 0.1]
196
+ Example:
197
+ # 两路分割示例
198
+ DatasetSplit(
199
+ source_dir=source_dataset_path,
200
+ split_dirs=[target_train_path, target_val_path],
201
+ percentage=[0.9, 0.1]
202
+ )
203
+
204
+ # 三路分割示例
205
+ DatasetSplit(
206
+ source_dir=source_dataset_path,
207
+ split_dirs=[target_train_path, target_val_path, target_test_path],
208
+ percentage=[0.8, 0.1, 0.1]
209
+ )
210
+
211
+ # 使用默认比例的两路分割
212
+ DatasetSplit(
213
+ source_dir=source_dataset_path,
214
+ split_dirs=[target_train_path, target_val_path]
215
+ )
216
+ """
217
+ if percentage is None:
218
+ percentage = [0.9, 0.1] if len(split_dirs) == 2 else [0.8, 0.1, 0.1]
219
+
220
+ # 验证输入参数
221
+ if len(split_dirs) not in [2, 3]:
222
+ raise ValueError("只支持2路或3路分割(训练集/验证集 或 训练集/验证集/测试集)")
223
+ if len(percentage) != len(split_dirs):
224
+ raise ValueError("分割比例数量必须与目标目录数量相同")
225
+ if sum(percentage) != 1.0:
226
+ raise ValueError("分割比例之和必须等于1.0")
227
+
228
+ # 获取并打乱文件夹列表
229
+ leaf_folder_list = sorted(get_leaf_folders(source_dir))
230
+ random.seed(10086)
231
+ random.shuffle(leaf_folder_list)
232
+ total_folder_num = len(leaf_folder_list)
233
+
234
+ # 计算分割点
235
+ split_indices = []
236
+ acc_percentage = 0
237
+ for p in percentage[:-1]: # 最后一个比例不需要计算
238
+ acc_percentage += p
239
+ split_indices.append(int(total_folder_num * acc_percentage))
240
+
241
+ # 分割文件夹列表
242
+ split_folder_lists = []
243
+ start_idx = 0
244
+ for end_idx in split_indices:
245
+ split_folder_lists.append(leaf_folder_list[start_idx:end_idx])
246
+ start_idx = end_idx
247
+ split_folder_lists.append(leaf_folder_list[start_idx:]) # 添加最后一部分
248
+
249
+ # 复制文件夹
250
+ split_names = ['train', 'val', 'test']
251
+ for folders, target_dir, split_name in zip(split_folder_lists, split_dirs, split_names[:len(split_dirs)]):
252
+ for folder in tqdm(folders, desc=f"Copying {split_name} folders"):
253
+ target_folder = folder.replace(source_dir, target_dir)
254
+ os.makedirs(os.path.dirname(target_folder), exist_ok=True)
255
+ shutil.copytree(folder, target_folder)
256
+
257
+ # 打印统计信息
258
+ print(f"Total folders: {total_folder_num}")
259
+ for folders, split_name in zip(split_folder_lists, split_names[:len(split_dirs)]):
260
+ print(f"{split_name.capitalize()} folders: {len(folders)}")
261
+
262
+
263
+ def pcm2wav(pcm_path, wav_path, sr=16000, channels=1, subtype='PCM_16'):
264
+ """
265
+ 将pcm文件转换为wav文件
266
+ :param pcm_path: pcm文件路径
267
+ :param wav_path: wav文件路径
268
+ :param sr: 采样率
269
+ :param channels: 声道数
270
+ :param subtype: 子类型
271
+ """
272
+ pcm_data = np.fromfile(pcm_path, dtype=np.int16)
273
+ pcm_data = pcm_data.reshape(-1, channels) # 支持多通道
274
+ sf.write(wav_path, pcm_data, sr, subtype=subtype)
275
+
276
+
277
+ def wav2pcm(wav_path, pcm_path):
278
+ """
279
+ 将wav文件转换为pcm文件
280
+ :param wav_path: wav文件路径
281
+ :param pcm_path: pcm文件路径
282
+ """
283
+ data, _ = sf.read(wav_path, dtype='int16')
284
+ data.tofile(pcm_path)
285
+
286
+
287
+ def save_weight_histogram(model, save_dir, mode=["params", "buffers"], ignore_name=["scale", "bias"], bins=100):
288
+ """
289
+ 保存模型权重分布直方图
290
+ Args:
291
+ model: PyTorch模型
292
+ save_dir: 保存路径
293
+ mode: 保存模式, 可选值为["params", "buffers"]
294
+ bins: 直方图bin数量
295
+ """
296
+ import matplotlib.pyplot as plt
297
+ # 如果路径存在, 则删除
298
+ if os.path.exists(save_dir):
299
+ shutil.rmtree(save_dir)
300
+
301
+ if "params" in mode:
302
+ os.makedirs(os.path.join(save_dir, "param"), exist_ok=True)
303
+ for name, param in model.named_parameters():
304
+ if any(ignore in name for ignore in ignore_name):
305
+ continue
306
+ param = param.cpu().data.flatten().numpy()
307
+ param_min = param.min()
308
+ param_max = param.max()
309
+ param_mean = param.mean()
310
+ param_std = param.std()
311
+
312
+ # 保存模型参数到地址
313
+ # 绘制直方图
314
+ plt.title(name)
315
+ plt.xlabel("value")
316
+ plt.ylabel("count")
317
+ plt.grid(alpha=0.5)
318
+ # 在右上角添加统计信息
319
+ plt.text(1, 1, f"max: {param_max:.2f}\n \
320
+ min: {param_min:.2f}\n \
321
+ mean: {param_mean:.2f}\n \
322
+ std: {param_std:.2f}",
323
+ ha='right', va='top', transform=plt.gca().transAxes)
324
+ plt.hist(param, bins=bins)
325
+ plt.savefig(os.path.join(save_dir, "param", f"{name}.png"))
326
+ plt.close()
327
+ if "buffers" in mode:
328
+ os.makedirs(os.path.join(save_dir, "buffer"), exist_ok=True)
329
+ for name, buffer in model.named_buffers():
330
+ if "running_mean" not in name and "running_var" not in name:
331
+ continue
332
+ buffer = buffer.cpu().data.flatten().numpy()
333
+
334
+ # 计算统计数据
335
+ buffer_min = buffer.min()
336
+ buffer_max = buffer.max()
337
+ buffer_mean = buffer.mean()
338
+ buffer_std = buffer.std()
339
+
340
+ # 绘制直方图
341
+ plt.title(name)
342
+ plt.xlabel("value")
343
+ plt.ylabel("count")
344
+ plt.grid(alpha=0.5)
345
+ # 在右上角添加统计信息
346
+ plt.text(1, 1, f"max: {buffer_max:.2f}\n \
347
+ min: {buffer_min:.2f}\n \
348
+ mean: {buffer_mean:.2f}\n \
349
+ std: {buffer_std:.2f}",
350
+ ha='right', va='top', transform=plt.gca().transAxes)
351
+ plt.hist(buffer, bins=bins)
352
+ plt.savefig(os.path.join(save_dir, "buffer", f"{name}.png"))
353
+ plt.close()