neverlib 0.2.2__py3-none-any.whl → 0.2.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (236) hide show
  1. neverlib/.history/Docs/audio_aug/test_snr_20250806011311.py +0 -0
  2. neverlib/.history/Docs/audio_aug/test_snr_20250806011331.py +75 -0
  3. neverlib/.history/Docs/audio_aug/test_snr_20250806011342.py +57 -0
  4. neverlib/.history/Docs/audio_aug/test_snr_20250806011352.py +57 -0
  5. neverlib/.history/Docs/audio_aug/test_snr_20250806011403.py +57 -0
  6. neverlib/.history/Docs/audio_aug/test_snr_20250806011413.py +57 -0
  7. neverlib/.history/Docs/audio_aug/test_snr_20250806011435.py +55 -0
  8. neverlib/.history/Docs/vad/1_20250810032405.py +0 -0
  9. neverlib/.history/Docs/vad/1_20250810032417.py +39 -0
  10. neverlib/.history/audio_aug/audio_aug_20250806010451.py +125 -0
  11. neverlib/.history/audio_aug/audio_aug_20250806010750.py +138 -0
  12. neverlib/.history/audio_aug/audio_aug_20250806010759.py +140 -0
  13. neverlib/.history/audio_aug/audio_aug_20250806010803.py +140 -0
  14. neverlib/.history/audio_aug/audio_aug_20250806010809.py +140 -0
  15. neverlib/.history/audio_aug/audio_aug_20250806011108.py +140 -0
  16. neverlib/.history/dataAnalyze/__init___20250805234204.py +87 -0
  17. neverlib/.history/dataAnalyze/__init___20250806204125.py +14 -0
  18. neverlib/.history/dataAnalyze/__init___20250806204139.py +14 -0
  19. neverlib/.history/dataAnalyze/__init___20250806204159.py +14 -0
  20. neverlib/.history/filter/__init___20250820103351.py +70 -0
  21. neverlib/.history/filter/__init___20250821102348.py +70 -0
  22. neverlib/.history/filter/__init___20250821102405.py +14 -0
  23. neverlib/.history/filter/auto_eq/__init___20250819213121.py +36 -0
  24. neverlib/.history/filter/auto_eq/__init___20250821102241.py +36 -0
  25. neverlib/.history/filter/auto_eq/__init___20250821102259.py +36 -0
  26. neverlib/.history/filter/auto_eq/__init___20250821102307.py +36 -0
  27. neverlib/.history/filter/auto_eq/__init___20250821102310.py +36 -0
  28. neverlib/.history/filter/auto_eq/__init___20250821102318.py +36 -0
  29. neverlib/.history/filter/auto_eq/__init___20250821102507.py +36 -0
  30. neverlib/.history/filter/auto_eq/de_eq_20250820103848.py +361 -0
  31. neverlib/.history/filter/auto_eq/de_eq_20250821102422.py +360 -0
  32. neverlib/.history/filter/auto_eq/freq_eq_20250805234206.py +75 -0
  33. neverlib/.history/filter/auto_eq/freq_eq_20250820140732.py +75 -0
  34. neverlib/.history/filter/auto_eq/freq_eq_20250820140745.py +75 -0
  35. neverlib/.history/filter/auto_eq/freq_eq_20250820140816.py +75 -0
  36. neverlib/.history/filter/auto_eq/freq_eq_20250820140938.py +77 -0
  37. neverlib/.history/filter/auto_eq/freq_eq_20250820141003.py +77 -0
  38. neverlib/.history/filter/auto_eq/freq_eq_20250820141006.py +77 -0
  39. neverlib/.history/filter/auto_eq/freq_eq_20250820141019.py +77 -0
  40. neverlib/.history/filter/auto_eq/freq_eq_20250820141049.py +77 -0
  41. neverlib/.history/filter/auto_eq/freq_eq_20250820141211.py +77 -0
  42. neverlib/.history/filter/auto_eq/freq_eq_20250820141227.py +77 -0
  43. neverlib/.history/filter/auto_eq/freq_eq_20250820141311.py +78 -0
  44. neverlib/.history/filter/auto_eq/freq_eq_20250820141340.py +78 -0
  45. neverlib/.history/filter/auto_eq/freq_eq_20250820141712.py +78 -0
  46. neverlib/.history/filter/auto_eq/freq_eq_20250820141733.py +78 -0
  47. neverlib/.history/filter/auto_eq/freq_eq_20250820141755.py +78 -0
  48. neverlib/.history/filter/auto_eq/freq_eq_20250821102434.py +76 -0
  49. neverlib/.history/filter/auto_eq/freq_eq_20250821102500.py +76 -0
  50. neverlib/.history/filter/auto_eq/freq_eq_20250821102502.py +76 -0
  51. neverlib/.history/filter/auto_eq/ga_eq_basic_20250820102957.py +380 -0
  52. neverlib/.history/filter/auto_eq/ga_eq_basic_20250820113054.py +380 -0
  53. neverlib/.history/filter/auto_eq/ga_eq_basic_20250820113150.py +380 -0
  54. neverlib/.history/filter/auto_eq/ga_eq_basic_20250820113520.py +385 -0
  55. neverlib/.history/filter/auto_eq/ga_eq_basic_20250820113525.py +385 -0
  56. neverlib/.history/filter/auto_eq/ga_eq_basic_20250821102212.py +385 -0
  57. neverlib/.history/metrics/dnsmos_20250806001612.py +160 -0
  58. neverlib/.history/metrics/dnsmos_20250815180659.py +160 -0
  59. neverlib/.history/metrics/dnsmos_20250815180701.py +158 -0
  60. neverlib/.history/metrics/dnsmos_20250815181321.py +154 -0
  61. neverlib/.history/metrics/dnsmos_20250815181327.py +154 -0
  62. neverlib/.history/metrics/dnsmos_20250815181331.py +154 -0
  63. neverlib/.history/metrics/dnsmos_20250815181620.py +154 -0
  64. neverlib/.history/metrics/dnsmos_20250815181631.py +154 -0
  65. neverlib/.history/metrics/dnsmos_20250815181742.py +154 -0
  66. neverlib/.history/metrics/dnsmos_20250815181824.py +153 -0
  67. neverlib/.history/metrics/dnsmos_20250815181834.py +153 -0
  68. neverlib/.history/metrics/dnsmos_20250815181922.py +153 -0
  69. neverlib/.history/metrics/dnsmos_20250815182011.py +147 -0
  70. neverlib/.history/metrics/dnsmos_20250815182036.py +144 -0
  71. neverlib/.history/metrics/dnsmos_20250815182936.py +143 -0
  72. neverlib/.history/metrics/dnsmos_20250815182942.py +143 -0
  73. neverlib/.history/metrics/dnsmos_20250815183032.py +137 -0
  74. neverlib/.history/metrics/dnsmos_20250815183101.py +144 -0
  75. neverlib/.history/metrics/dnsmos_20250815183121.py +144 -0
  76. neverlib/.history/metrics/dnsmos_20250815183123.py +143 -0
  77. neverlib/.history/metrics/dnsmos_20250815183214.py +143 -0
  78. neverlib/.history/metrics/dnsmos_20250815183240.py +143 -0
  79. neverlib/.history/metrics/dnsmos_20250815183248.py +144 -0
  80. neverlib/.history/metrics/dnsmos_20250815183407.py +142 -0
  81. neverlib/.history/metrics/dnsmos_20250815183409.py +142 -0
  82. neverlib/.history/metrics/dnsmos_20250815183431.py +142 -0
  83. neverlib/.history/metrics/dnsmos_20250815183507.py +140 -0
  84. neverlib/.history/metrics/dnsmos_20250815183513.py +139 -0
  85. neverlib/.history/metrics/dnsmos_20250815183618.py +139 -0
  86. neverlib/.history/metrics/dnsmos_20250815183709.py +140 -0
  87. neverlib/.history/metrics/dnsmos_20250815183756.py +137 -0
  88. neverlib/.history/metrics/dnsmos_20250815183815.py +128 -0
  89. neverlib/.history/metrics/dnsmos_20250815183827.py +129 -0
  90. neverlib/.history/metrics/dnsmos_20250815183913.py +117 -0
  91. neverlib/.history/metrics/dnsmos_20250815183914.py +117 -0
  92. neverlib/.history/metrics/dnsmos_20250815184003.py +118 -0
  93. neverlib/.history/metrics/dnsmos_20250815184040.py +118 -0
  94. neverlib/.history/metrics/dnsmos_20250815184049.py +118 -0
  95. neverlib/.history/metrics/dnsmos_20250815184104.py +117 -0
  96. neverlib/.history/metrics/dnsmos_20250815184200.py +117 -0
  97. neverlib/.history/metrics/lpc_lsp_metric_20250816015944.py +128 -0
  98. neverlib/.history/metrics/lpc_lsp_metric_20250816020142.py +128 -0
  99. neverlib/.history/metrics/lpc_lsp_metric_20250816020156.py +128 -0
  100. neverlib/.history/metrics/lpc_lsp_metric_20250816020554.py +130 -0
  101. neverlib/.history/metrics/lpc_lsp_metric_20250816020600.py +125 -0
  102. neverlib/.history/metrics/lpc_lsp_metric_20250816020631.py +120 -0
  103. neverlib/.history/metrics/lpc_lsp_metric_20250816020746.py +118 -0
  104. neverlib/.history/metrics/lpc_me_20250816013111.py +0 -0
  105. neverlib/.history/metrics/lpc_me_20250816013129.py +121 -0
  106. neverlib/.history/metrics/lpc_me_20250816015430.py +103 -0
  107. neverlib/.history/metrics/lpc_me_20250816015535.py +96 -0
  108. neverlib/.history/metrics/lpc_me_20250816015542.py +96 -0
  109. neverlib/.history/metrics/lpc_me_20250816015636.py +97 -0
  110. neverlib/.history/metrics/lpc_me_20250816015658.py +104 -0
  111. neverlib/.history/metrics/lpc_me_20250816015703.py +100 -0
  112. neverlib/.history/metrics/lpc_me_20250816015945.py +128 -0
  113. neverlib/.history/metrics/snr_20250806010538.py +177 -0
  114. neverlib/.history/metrics/snr_20250806211634.py +184 -0
  115. neverlib/.history/metrics/spec_20250805234209.py +45 -0
  116. neverlib/.history/metrics/spec_20250816135530.py +11 -0
  117. neverlib/.history/metrics/spec_20250816135654.py +16 -0
  118. neverlib/.history/metrics/spec_20250816135736.py +68 -0
  119. neverlib/.history/metrics/spec_20250816135904.py +75 -0
  120. neverlib/.history/metrics/spec_20250816135921.py +82 -0
  121. neverlib/.history/metrics/spec_20250816140111.py +82 -0
  122. neverlib/.history/metrics/spec_20250816140543.py +136 -0
  123. neverlib/.history/metrics/spec_20250816140559.py +172 -0
  124. neverlib/.history/metrics/spec_20250816140602.py +172 -0
  125. neverlib/.history/metrics/spec_20250816140608.py +172 -0
  126. neverlib/.history/metrics/spec_20250816140654.py +148 -0
  127. neverlib/.history/metrics/spec_20250816140705.py +144 -0
  128. neverlib/.history/metrics/spec_20250816140755.py +138 -0
  129. neverlib/.history/metrics/spec_20250816140823.py +170 -0
  130. neverlib/.history/metrics/spec_20250816140832.py +170 -0
  131. neverlib/.history/metrics/spec_20250816140833.py +170 -0
  132. neverlib/.history/metrics/spec_20250816140922.py +147 -0
  133. neverlib/.history/metrics/spec_20250816141148.py +107 -0
  134. neverlib/.history/metrics/spec_20250816141219.py +123 -0
  135. neverlib/.history/metrics/spec_20250816141732.py +178 -0
  136. neverlib/.history/metrics/spec_20250816141740.py +178 -0
  137. neverlib/.history/metrics/spec_20250816142030.py +178 -0
  138. neverlib/.history/metrics/spec_20250816142107.py +135 -0
  139. neverlib/.history/metrics/spec_20250816142126.py +135 -0
  140. neverlib/.history/metrics/spec_20250816142410.py +135 -0
  141. neverlib/.history/metrics/spec_20250816142415.py +136 -0
  142. neverlib/.history/metrics/spec_metric_20250816135156.py +0 -0
  143. neverlib/.history/metrics/spec_metric_20250816135226.py +5 -0
  144. neverlib/.history/metrics/spec_metric_20250816135227.py +10 -0
  145. neverlib/.history/metrics/spec_metric_20250816135306.py +15 -0
  146. neverlib/.history/metrics/spec_metric_20250816135442.py +31 -0
  147. neverlib/.history/metrics/spec_metric_20250816135448.py +31 -0
  148. neverlib/.history/metrics/spec_metric_20250816135520.py +29 -0
  149. neverlib/.history/metrics/spec_metric_20250816135537.py +63 -0
  150. neverlib/.history/metrics/spec_metric_20250816135653.py +65 -0
  151. neverlib/.history/vad/PreProcess_20250805234211.py +63 -0
  152. neverlib/.history/vad/PreProcess_20250809232455.py +63 -0
  153. neverlib/.history/vad/PreProcess_20250816020725.py +66 -0
  154. neverlib/.history/vad/VAD_Silero_20250805234211.py +50 -0
  155. neverlib/.history/vad/VAD_Silero_20250809232456.py +50 -0
  156. neverlib/.history/vad/VAD_WebRTC_20250805234211.py +61 -0
  157. neverlib/.history/vad/VAD_WebRTC_20250809232456.py +61 -0
  158. neverlib/.history/vad/VAD_funasr_20250805234211.py +54 -0
  159. neverlib/.history/vad/VAD_funasr_20250809232456.py +54 -0
  160. neverlib/.history/vad/VAD_vadlib_20250805234211.py +70 -0
  161. neverlib/.history/vad/VAD_vadlib_20250809232455.py +70 -0
  162. neverlib/.history/vad/VAD_whisper_20250805234211.py +55 -0
  163. neverlib/.history/vad/VAD_whisper_20250809232456.py +55 -0
  164. neverlib/.specstory/.what-is-this.md +69 -0
  165. neverlib/.specstory/history/2025-08-05_17-06Z-/350/277/231/344/270/200/346/255/245/347/232/204/347/233/256/347/232/204/346/230/257/344/273/200/344/271/210.md +424 -0
  166. neverlib/Docs/audio_aug/test_snr.py +55 -0
  167. neverlib/__init__.py +2 -2
  168. neverlib/audio_aug/HarmonicDistortion.py +79 -0
  169. neverlib/audio_aug/TFDrop.py +41 -0
  170. neverlib/audio_aug/TFMask.py +56 -0
  171. neverlib/audio_aug/__init__.py +1 -1
  172. neverlib/audio_aug/audio_aug.py +19 -5
  173. neverlib/audio_aug/clip_aug.py +41 -0
  174. neverlib/audio_aug/coder_aug.py +209 -0
  175. neverlib/audio_aug/coder_aug2.py +118 -0
  176. neverlib/audio_aug/loss_packet_aug.py +103 -0
  177. neverlib/audio_aug/quant_aug.py +78 -0
  178. neverlib/data_analyze/README.md +234 -0
  179. neverlib/data_analyze/__init__.py +14 -0
  180. neverlib/data_analyze/dataset_analyzer.py +590 -0
  181. neverlib/data_analyze/quality_metrics.py +364 -0
  182. neverlib/data_analyze/rms_distrubution.py +62 -0
  183. neverlib/data_analyze/spectral_analysis.py +218 -0
  184. neverlib/data_analyze/statistics.py +406 -0
  185. neverlib/data_analyze/temporal_features.py +126 -0
  186. neverlib/data_analyze/visualization.py +468 -0
  187. neverlib/filter/README.md +101 -0
  188. neverlib/filter/__init__.py +7 -0
  189. neverlib/filter/auto_eq/README.md +165 -0
  190. neverlib/filter/auto_eq/__init__.py +36 -0
  191. neverlib/filter/auto_eq/de_eq.py +360 -0
  192. neverlib/filter/auto_eq/freq_eq.py +76 -0
  193. neverlib/filter/auto_eq/ga_eq_advanced.py +577 -0
  194. neverlib/filter/auto_eq/ga_eq_basic.py +385 -0
  195. neverlib/filter/biquad.py +45 -0
  196. neverlib/filter/common.py +5 -6
  197. neverlib/filter/core.py +339 -0
  198. neverlib/metrics/dnsmos.py +117 -0
  199. neverlib/metrics/lpc_lsp.py +118 -0
  200. neverlib/metrics/snr.py +184 -0
  201. neverlib/metrics/spec.py +136 -0
  202. neverlib/metrics/test_pesq.py +35 -0
  203. neverlib/metrics/time.py +68 -0
  204. neverlib/tests/test_vad.py +21 -0
  205. neverlib/utils/audio_split.py +2 -1
  206. neverlib/utils/message.py +4 -4
  207. neverlib/utils/utils.py +36 -16
  208. neverlib/vad/PreProcess.py +6 -3
  209. neverlib/vad/README.md +10 -10
  210. neverlib/vad/VAD_Energy.py +1 -1
  211. neverlib/vad/VAD_Silero.py +2 -2
  212. neverlib/vad/VAD_WebRTC.py +2 -2
  213. neverlib/vad/VAD_funasr.py +2 -2
  214. neverlib/vad/VAD_statistics.py +3 -3
  215. neverlib/vad/VAD_vadlib.py +3 -3
  216. neverlib/vad/VAD_whisper.py +2 -2
  217. neverlib/vad/__init__.py +1 -1
  218. neverlib/vad/class_get_speech.py +4 -4
  219. neverlib/vad/class_vad.py +1 -1
  220. neverlib/vad/utils.py +47 -5
  221. {neverlib-0.2.2.dist-info → neverlib-0.2.4.dist-info}/METADATA +120 -120
  222. neverlib-0.2.4.dist-info/RECORD +229 -0
  223. {neverlib-0.2.2.dist-info → neverlib-0.2.4.dist-info}/WHEEL +1 -1
  224. neverlib/Documents/vad/VAD_Energy.ipynb +0 -159
  225. neverlib/Documents/vad/VAD_Silero.ipynb +0 -305
  226. neverlib/Documents/vad/VAD_WebRTC.ipynb +0 -183
  227. neverlib/Documents/vad/VAD_funasr.ipynb +0 -179
  228. neverlib/Documents/vad/VAD_ppasr.ipynb +0 -175
  229. neverlib/Documents/vad/VAD_statistics.ipynb +0 -522
  230. neverlib/Documents/vad/VAD_vadlib.ipynb +0 -184
  231. neverlib/Documents/vad/VAD_whisper.ipynb +0 -430
  232. neverlib/utils/waveform_analyzer.py +0 -51
  233. neverlib/wav_data/000_short.wav +0 -0
  234. neverlib-0.2.2.dist-info/RECORD +0 -40
  235. {neverlib-0.2.2.dist-info → neverlib-0.2.4.dist-info}/licenses/LICENSE +0 -0
  236. {neverlib-0.2.2.dist-info → neverlib-0.2.4.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,144 @@
1
+ '''
2
+ Author: 凌逆战 | Never
3
+ Date: 2025-08-06 10:00:00
4
+ Description:
5
+ 要计算个性化 MOS 分数(干扰说话者受到惩罚),请提供“-p”参数,例如:python dnsmos.py -t ./SampleClips -o sample.csv -p
6
+ 要计算常规 MOS 分数,请省略“-p”参数。例如:python dnsmos.py -t ./SampleClips -o sample.csv
7
+ '''
8
+ import argparse
9
+ import concurrent.futures
10
+ import glob
11
+ import os
12
+ import librosa
13
+ import numpy as np
14
+ import onnxruntime as ort
15
+ import pandas as pd
16
+ import soundfile as sf
17
+ from tqdm import tqdm
18
+ from neverlib.utils import get_path_list
19
+
20
+ SAMPLING_RATE = 16000
21
+ INPUT_LENGTH = 9.01
22
+
23
+
24
+ class ComputeScore:
25
+ def __init__(self, primary_model_path, p808_model_path) -> None:
26
+ self.onnx_sess = ort.InferenceSession(primary_model_path)
27
+ self.p808_onnx_sess = ort.InferenceSession(p808_model_path)
28
+
29
+ def audio_melspec(self, audio, n_mels=120, frame_size=320, hop_length=160, sr=16000, to_db=True):
30
+ mel_spec = librosa.feature.melspectrogram(y=audio, sr=sr, n_fft=frame_size + 1, hop_length=hop_length, n_mels=n_mels)
31
+ if to_db:
32
+ mel_spec = (librosa.power_to_db(mel_spec, ref=np.max) + 40) / 40
33
+ return mel_spec.T
34
+
35
+ def get_polyfit_val(self, sig, bak, ovr, is_personalized_MOS):
36
+ if is_personalized_MOS:
37
+ p_ovr = np.poly1d([-0.00533021, 0.005101, 1.18058466, -0.11236046])
38
+ p_sig = np.poly1d([-0.01019296, 0.02751166, 1.19576786, -0.24348726])
39
+ p_bak = np.poly1d([-0.04976499, 0.44276479, -0.1644611, 0.96883132])
40
+ else:
41
+ p_ovr = np.poly1d([-0.06766283, 1.11546468, 0.04602535])
42
+ p_sig = np.poly1d([-0.08397278, 1.22083953, 0.0052439])
43
+ p_bak = np.poly1d([-0.13166888, 1.60915514, -0.39604546])
44
+
45
+ sig_poly, bak_poly, ovr_poly = p_sig(sig), p_bak(bak), p_ovr(ovr)
46
+
47
+ return sig_poly, bak_poly, ovr_poly
48
+
49
+ def __call__(self, fpath, sampling_rate, is_personalized_MOS):
50
+ aud, input_fs = sf.read(fpath)
51
+ fs = sampling_rate
52
+ if input_fs != fs:
53
+ audio = librosa.resample(aud, input_fs, fs)
54
+ else:
55
+ audio = aud
56
+ actual_audio_len = len(audio)
57
+ len_samples = int(INPUT_LENGTH * fs)
58
+ while len(audio) < len_samples:
59
+ audio = np.append(audio, audio)
60
+
61
+ num_hops = int(np.floor(len(audio) / fs) - INPUT_LENGTH) + 1
62
+ hop_len_samples = fs
63
+ predicted_mos_sig_seg_raw = []
64
+ predicted_mos_bak_seg_raw = []
65
+ predicted_mos_ovr_seg_raw = []
66
+ predicted_mos_sig_seg = []
67
+ predicted_mos_bak_seg = []
68
+ predicted_mos_ovr_seg = []
69
+ predicted_p808_mos = []
70
+
71
+ for idx in range(num_hops):
72
+ audio_seg = audio[int(idx * hop_len_samples): int((idx + INPUT_LENGTH) * hop_len_samples)]
73
+ if len(audio_seg) < len_samples:
74
+ continue
75
+
76
+ input_features = np.array(audio_seg).astype('float32')[np.newaxis, :]
77
+ p808_input_features = np.array(self.audio_melspec(audio=audio_seg[:-160])).astype('float32')[np.newaxis, :, :]
78
+ oi = {'input_1': input_features}
79
+ p808_oi = {'input_1': p808_input_features}
80
+ p808_mos = self.p808_onnx_sess.run(None, p808_oi)[0][0][0]
81
+ mos_sig_raw, mos_bak_raw, mos_ovr_raw = self.onnx_sess.run(None, oi)[0][0]
82
+ mos_sig, mos_bak, mos_ovr = self.get_polyfit_val(mos_sig_raw, mos_bak_raw, mos_ovr_raw, is_personalized_MOS)
83
+ predicted_mos_sig_seg_raw.append(mos_sig_raw)
84
+ predicted_mos_bak_seg_raw.append(mos_bak_raw)
85
+ predicted_mos_ovr_seg_raw.append(mos_ovr_raw)
86
+ predicted_mos_sig_seg.append(mos_sig)
87
+ predicted_mos_bak_seg.append(mos_bak)
88
+ predicted_mos_ovr_seg.append(mos_ovr)
89
+ predicted_p808_mos.append(p808_mos)
90
+
91
+ clip_dict = {'filename': fpath, 'len_in_sec': actual_audio_len / fs, 'sr': fs}
92
+ clip_dict['num_hops'] = num_hops
93
+ clip_dict['OVRL_raw'] = np.mean(predicted_mos_ovr_seg_raw)
94
+ clip_dict['SIG_raw'] = np.mean(predicted_mos_sig_seg_raw)
95
+ clip_dict['BAK_raw'] = np.mean(predicted_mos_bak_seg_raw)
96
+ clip_dict['OVRL'] = np.mean(predicted_mos_ovr_seg)
97
+ clip_dict['SIG'] = np.mean(predicted_mos_sig_seg)
98
+ clip_dict['BAK'] = np.mean(predicted_mos_bak_seg)
99
+ clip_dict['P808_MOS'] = np.mean(predicted_p808_mos)
100
+ return clip_dict
101
+
102
+
103
+ def main(args):
104
+ models = glob.glob(os.path.join(args.testset_dir, "*"))
105
+ audio_clips_list = []
106
+ p808_model_path = os.path.join('DNSMOS', 'model_v8.onnx')
107
+
108
+ if args.personalized_MOS:
109
+ primary_model_path = os.path.join('pDNSMOS', 'sig_bak_ovr.onnx')
110
+ else:
111
+ primary_model_path = os.path.join('DNSMOS', 'sig_bak_ovr.onnx')
112
+
113
+ compute_score = ComputeScore(primary_model_path, p808_model_path)
114
+
115
+ rows = []
116
+ clips = []
117
+ is_personalized_eval = args.personalized_MOS
118
+ desired_fs = SAMPLING_RATE
119
+
120
+ clips = get_path_list(args.testset_dir, 'wav')
121
+
122
+ for clip in tqdm(clips):
123
+ data = compute_score(clip, desired_fs, is_personalized_eval)
124
+ rows.append(data)
125
+
126
+ df = pd.DataFrame(rows)
127
+ if args.csv_path:
128
+ csv_path = args.csv_path
129
+ df.to_csv(csv_path)
130
+ else:
131
+ print(df.describe())
132
+
133
+
134
+ if __name__ == "__main__":
135
+ parser = argparse.ArgumentParser()
136
+ parser.add_argument('-t', "--testset_dir", default='.',
137
+ help='包含要评估的.wav格式音频剪辑的目录的路径')
138
+ parser.add_argument('-o', "--csv_path", default=None, help='保存结果的csv文件')
139
+ parser.add_argument('-p', "--personalized_MOS", action='store_true',
140
+ help='标志表明是需要个性化的MOS分数还是常规的')
141
+
142
+ args = parser.parse_args()
143
+
144
+ main(args)
@@ -0,0 +1,143 @@
1
+ '''
2
+ Author: 凌逆战 | Never
3
+ Date: 2025-08-06 10:00:00
4
+ Description:
5
+ 要计算个性化 MOS 分数(干扰说话者受到惩罚),请提供“-p”参数,例如:python dnsmos.py -t ./SampleClips -o sample.csv -p
6
+ 要计算常规 MOS 分数,请省略“-p”参数。例如:python dnsmos.py -t ./SampleClips -o sample.csv
7
+ '''
8
+ import argparse
9
+ import concurrent.futures
10
+ import glob
11
+ import os
12
+ import librosa
13
+ import numpy as np
14
+ import onnxruntime as ort
15
+ import pandas as pd
16
+ import soundfile as sf
17
+ from tqdm import tqdm
18
+ from neverlib.utils import get_path_list
19
+
20
+
21
+ class ComputeScore:
22
+ def __init__(self, primary_model_path, p808_model_path, sampling_rate, input_length) -> None:
23
+ self.sampling_rate = sampling_rate
24
+ self.input_length = input_length
25
+ self.onnx_sess = ort.InferenceSession(primary_model_path)
26
+ self.p808_onnx_sess = ort.InferenceSession(p808_model_path)
27
+
28
+ def audio_melspec(self, audio, n_mels=120, frame_size=320, hop_length=160, sr=16000, to_db=True):
29
+ mel_spec = librosa.feature.melspectrogram(y=audio, sr=sr, n_fft=frame_size + 1, hop_length=hop_length, n_mels=n_mels)
30
+ if to_db:
31
+ mel_spec = (librosa.power_to_db(mel_spec, ref=np.max) + 40) / 40
32
+ return mel_spec.T
33
+
34
+ def get_polyfit_val(self, sig, bak, ovr, is_personalized_MOS):
35
+ if is_personalized_MOS:
36
+ p_ovr = np.poly1d([-0.00533021, 0.005101, 1.18058466, -0.11236046])
37
+ p_sig = np.poly1d([-0.01019296, 0.02751166, 1.19576786, -0.24348726])
38
+ p_bak = np.poly1d([-0.04976499, 0.44276479, -0.1644611, 0.96883132])
39
+ else:
40
+ p_ovr = np.poly1d([-0.06766283, 1.11546468, 0.04602535])
41
+ p_sig = np.poly1d([-0.08397278, 1.22083953, 0.0052439])
42
+ p_bak = np.poly1d([-0.13166888, 1.60915514, -0.39604546])
43
+
44
+ sig_poly, bak_poly, ovr_poly = p_sig(sig), p_bak(bak), p_ovr(ovr)
45
+
46
+ return sig_poly, bak_poly, ovr_poly
47
+
48
+ def __call__(self, fpath, sampling_rate, is_personalized_MOS):
49
+ aud, input_fs = sf.read(fpath)
50
+ fs = sampling_rate
51
+ if input_fs != fs:
52
+ audio = librosa.resample(aud, input_fs, fs)
53
+ else:
54
+ audio = aud
55
+ actual_audio_len = len(audio)
56
+ len_samples = int(self.input_length * fs)
57
+ while len(audio) < len_samples:
58
+ audio = np.append(audio, audio)
59
+
60
+ num_hops = int(np.floor(len(audio) / fs) - self.input_length) + 1
61
+ hop_len_samples = fs
62
+ predicted_mos_sig_seg_raw = []
63
+ predicted_mos_bak_seg_raw = []
64
+ predicted_mos_ovr_seg_raw = []
65
+ predicted_mos_sig_seg = []
66
+ predicted_mos_bak_seg = []
67
+ predicted_mos_ovr_seg = []
68
+ predicted_p808_mos = []
69
+
70
+ for idx in range(num_hops):
71
+ audio_seg = audio[int(idx * hop_len_samples): int((idx + self.input_length) * hop_len_samples)]
72
+ if len(audio_seg) < len_samples:
73
+ continue
74
+
75
+ input_features = np.array(audio_seg).astype('float32')[np.newaxis, :]
76
+ p808_input_features = np.array(self.audio_melspec(audio=audio_seg[:-160])).astype('float32')[np.newaxis, :, :]
77
+ oi = {'input_1': input_features}
78
+ p808_oi = {'input_1': p808_input_features}
79
+ p808_mos = self.p808_onnx_sess.run(None, p808_oi)[0][0][0]
80
+ mos_sig_raw, mos_bak_raw, mos_ovr_raw = self.onnx_sess.run(None, oi)[0][0]
81
+ mos_sig, mos_bak, mos_ovr = self.get_polyfit_val(mos_sig_raw, mos_bak_raw, mos_ovr_raw, is_personalized_MOS)
82
+ predicted_mos_sig_seg_raw.append(mos_sig_raw)
83
+ predicted_mos_bak_seg_raw.append(mos_bak_raw)
84
+ predicted_mos_ovr_seg_raw.append(mos_ovr_raw)
85
+ predicted_mos_sig_seg.append(mos_sig)
86
+ predicted_mos_bak_seg.append(mos_bak)
87
+ predicted_mos_ovr_seg.append(mos_ovr)
88
+ predicted_p808_mos.append(p808_mos)
89
+
90
+ clip_dict = {'filename': fpath, 'len_in_sec': actual_audio_len / fs, 'sr': fs}
91
+ clip_dict['num_hops'] = num_hops
92
+ OVRL_raw = np.mean(predicted_mos_ovr_seg_raw)
93
+ SIG_raw = np.mean(predicted_mos_sig_seg_raw)
94
+ BAK_raw = np.mean(predicted_mos_bak_seg_raw)
95
+ OVRL = np.mean(predicted_mos_ovr_seg)
96
+ SIG = np.mean(predicted_mos_sig_seg)
97
+ BAK = np.mean(predicted_mos_bak_seg)
98
+ P808_MOS = np.mean(predicted_p808_mos)
99
+ return OVRL_raw, SIG_raw, BAK_raw, OVRL, SIG, BAK, P808_MOS
100
+
101
+
102
+ def main(args):
103
+ SAMPLING_RATE = 16000
104
+ INPUT_LENGTH = 9.01
105
+ p808_model_path = os.path.join('DNSMOS', 'model_v8.onnx')
106
+
107
+ if args.personalized_MOS:
108
+ primary_model_path = os.path.join('pDNSMOS', 'sig_bak_ovr.onnx')
109
+ else:
110
+ primary_model_path = os.path.join('DNSMOS', 'sig_bak_ovr.onnx')
111
+
112
+ compute_score = ComputeScore(primary_model_path, p808_model_path)
113
+
114
+ rows = []
115
+ clips = []
116
+ is_personalized_eval = args.personalized_MOS
117
+ desired_fs = SAMPLING_RATE
118
+
119
+ clips = get_path_list(args.testset_dir, 'wav')
120
+
121
+ for clip in tqdm(clips):
122
+ data = compute_score(clip, desired_fs, is_personalized_eval)
123
+ rows.append(data)
124
+
125
+ df = pd.DataFrame(rows)
126
+ if args.csv_path:
127
+ csv_path = args.csv_path
128
+ df.to_csv(csv_path)
129
+ else:
130
+ print(df.describe())
131
+
132
+
133
+ if __name__ == "__main__":
134
+ parser = argparse.ArgumentParser()
135
+ parser.add_argument('-t', "--testset_dir", default='.',
136
+ help='包含要评估的.wav格式音频剪辑的目录的路径')
137
+ parser.add_argument('-o', "--csv_path", default=None, help='保存结果的csv文件')
138
+ parser.add_argument('-p', "--personalized_MOS", action='store_true',
139
+ help='标志表明是需要个性化的MOS分数还是常规的')
140
+
141
+ args = parser.parse_args()
142
+
143
+ main(args)
@@ -0,0 +1,143 @@
1
+ '''
2
+ Author: 凌逆战 | Never
3
+ Date: 2025-08-06 10:00:00
4
+ Description:
5
+ 要计算个性化 MOS 分数(干扰说话者受到惩罚),请提供“-p”参数,例如:python dnsmos.py -t ./SampleClips -o sample.csv -p
6
+ 要计算常规 MOS 分数,请省略“-p”参数。例如:python dnsmos.py -t ./SampleClips -o sample.csv
7
+ '''
8
+ import argparse
9
+ import concurrent.futures
10
+ import glob
11
+ import os
12
+ import librosa
13
+ import numpy as np
14
+ import onnxruntime as ort
15
+ import pandas as pd
16
+ import soundfile as sf
17
+ from tqdm import tqdm
18
+ from neverlib.utils import get_path_list
19
+
20
+
21
+ class ComputeScore:
22
+ def __init__(self, primary_model_path, p808_model_path, sampling_rate, input_length) -> None:
23
+ self.sampling_rate = sampling_rate
24
+ self.input_length = input_length
25
+ self.onnx_sess = ort.InferenceSession(primary_model_path)
26
+ self.p808_onnx_sess = ort.InferenceSession(p808_model_path)
27
+
28
+ def audio_melspec(self, audio, n_mels=120, frame_size=320, hop_length=160, sr=16000, to_db=True):
29
+ mel_spec = librosa.feature.melspectrogram(y=audio, sr=sr, n_fft=frame_size + 1, hop_length=hop_length, n_mels=n_mels)
30
+ if to_db:
31
+ mel_spec = (librosa.power_to_db(mel_spec, ref=np.max) + 40) / 40
32
+ return mel_spec.T
33
+
34
+ def get_polyfit_val(self, sig, bak, ovr, is_personalized_MOS):
35
+ if is_personalized_MOS:
36
+ p_ovr = np.poly1d([-0.00533021, 0.005101, 1.18058466, -0.11236046])
37
+ p_sig = np.poly1d([-0.01019296, 0.02751166, 1.19576786, -0.24348726])
38
+ p_bak = np.poly1d([-0.04976499, 0.44276479, -0.1644611, 0.96883132])
39
+ else:
40
+ p_ovr = np.poly1d([-0.06766283, 1.11546468, 0.04602535])
41
+ p_sig = np.poly1d([-0.08397278, 1.22083953, 0.0052439])
42
+ p_bak = np.poly1d([-0.13166888, 1.60915514, -0.39604546])
43
+
44
+ sig_poly, bak_poly, ovr_poly = p_sig(sig), p_bak(bak), p_ovr(ovr)
45
+
46
+ return sig_poly, bak_poly, ovr_poly
47
+
48
+ def __call__(self, fpath, sampling_rate, is_personalized_MOS):
49
+ aud, input_fs = sf.read(fpath)
50
+ fs = sampling_rate
51
+ if input_fs != fs:
52
+ audio = librosa.resample(aud, input_fs, fs)
53
+ else:
54
+ audio = aud
55
+ actual_audio_len = len(audio)
56
+ len_samples = int(self.input_length * fs)
57
+ while len(audio) < len_samples:
58
+ audio = np.append(audio, audio)
59
+
60
+ num_hops = int(np.floor(len(audio) / fs) - self.input_length) + 1
61
+ hop_len_samples = fs
62
+ predicted_mos_sig_seg_raw = []
63
+ predicted_mos_bak_seg_raw = []
64
+ predicted_mos_ovr_seg_raw = []
65
+ predicted_mos_sig_seg = []
66
+ predicted_mos_bak_seg = []
67
+ predicted_mos_ovr_seg = []
68
+ predicted_p808_mos = []
69
+
70
+ for idx in range(num_hops):
71
+ audio_seg = audio[int(idx * hop_len_samples): int((idx + self.input_length) * hop_len_samples)]
72
+ if len(audio_seg) < len_samples:
73
+ continue
74
+
75
+ input_features = np.array(audio_seg).astype('float32')[np.newaxis, :]
76
+ p808_input_features = np.array(self.audio_melspec(audio=audio_seg[:-160])).astype('float32')[np.newaxis, :, :]
77
+ oi = {'input_1': input_features}
78
+ p808_oi = {'input_1': p808_input_features}
79
+ p808_mos = self.p808_onnx_sess.run(None, p808_oi)[0][0][0]
80
+ mos_sig_raw, mos_bak_raw, mos_ovr_raw = self.onnx_sess.run(None, oi)[0][0]
81
+ mos_sig, mos_bak, mos_ovr = self.get_polyfit_val(mos_sig_raw, mos_bak_raw, mos_ovr_raw, is_personalized_MOS)
82
+ predicted_mos_sig_seg_raw.append(mos_sig_raw)
83
+ predicted_mos_bak_seg_raw.append(mos_bak_raw)
84
+ predicted_mos_ovr_seg_raw.append(mos_ovr_raw)
85
+ predicted_mos_sig_seg.append(mos_sig)
86
+ predicted_mos_bak_seg.append(mos_bak)
87
+ predicted_mos_ovr_seg.append(mos_ovr)
88
+ predicted_p808_mos.append(p808_mos)
89
+
90
+ clip_dict = {'filename': fpath, 'len_in_sec': actual_audio_len / fs, 'sr': fs}
91
+ clip_dict['num_hops'] = num_hops
92
+ OVRL_raw = np.mean(predicted_mos_ovr_seg_raw)
93
+ SIG_raw = np.mean(predicted_mos_sig_seg_raw)
94
+ BAK_raw = np.mean(predicted_mos_bak_seg_raw)
95
+ OVRL = np.mean(predicted_mos_ovr_seg)
96
+ SIG = np.mean(predicted_mos_sig_seg)
97
+ BAK = np.mean(predicted_mos_bak_seg)
98
+ P808_MOS = np.mean(predicted_p808_mos)
99
+ return OVRL_raw, SIG_raw, BAK_raw, OVRL, SIG, BAK, P808_MOS
100
+
101
+
102
+ def main(args):
103
+ SAMPLING_RATE = 16000
104
+ INPUT_LENGTH = 9.01
105
+ p808_model_path = os.path.join('DNSMOS', 'model_v8.onnx')
106
+
107
+ if args.personalized_MOS:
108
+ primary_model_path = os.path.join('pDNSMOS', 'sig_bak_ovr.onnx')
109
+ else:
110
+ primary_model_path = os.path.join('DNSMOS', 'sig_bak_ovr.onnx')
111
+
112
+ compute_score = ComputeScore(primary_model_path, p808_model_path, SAMPLING_RATE, INPUT_LENGTH)
113
+
114
+ rows = []
115
+ clips = []
116
+ is_personalized_eval = args.personalized_MOS
117
+ desired_fs = SAMPLING_RATE
118
+
119
+ clips = get_path_list(args.testset_dir, 'wav')
120
+
121
+ for clip in tqdm(clips):
122
+ data = compute_score(clip, desired_fs, is_personalized_eval)
123
+ rows.append(data)
124
+
125
+ df = pd.DataFrame(rows)
126
+ if args.csv_path:
127
+ csv_path = args.csv_path
128
+ df.to_csv(csv_path)
129
+ else:
130
+ print(df.describe())
131
+
132
+
133
+ if __name__ == "__main__":
134
+ parser = argparse.ArgumentParser()
135
+ parser.add_argument('-t', "--testset_dir", default='.',
136
+ help='包含要评估的.wav格式音频剪辑的目录的路径')
137
+ parser.add_argument('-o', "--csv_path", default=None, help='保存结果的csv文件')
138
+ parser.add_argument('-p', "--personalized_MOS", action='store_true',
139
+ help='标志表明是需要个性化的MOS分数还是常规的')
140
+
141
+ args = parser.parse_args()
142
+
143
+ main(args)
@@ -0,0 +1,137 @@
1
+ '''
2
+ Author: 凌逆战 | Never
3
+ Date: 2025-08-06 10:00:00
4
+ Description:
5
+ 要计算个性化 MOS 分数(干扰说话者受到惩罚),请提供“-p”参数,例如:python dnsmos.py -t ./SampleClips -o sample.csv -p
6
+ 要计算常规 MOS 分数,请省略“-p”参数。例如:python dnsmos.py -t ./SampleClips -o sample.csv
7
+ '''
8
+ import argparse
9
+ import concurrent.futures
10
+ import glob
11
+ import os
12
+ import librosa
13
+ import numpy as np
14
+ import onnxruntime as ort
15
+ import pandas as pd
16
+ import soundfile as sf
17
+ from tqdm import tqdm
18
+ from neverlib.utils import get_path_list
19
+
20
+
21
+ class ComputeScore:
22
+ def __init__(self, primary_model_path, p808_model_path, sampling_rate, input_length) -> None:
23
+ self.sampling_rate = sampling_rate
24
+ self.input_length = input_length
25
+ self.onnx_sess = ort.InferenceSession(primary_model_path)
26
+ self.p808_onnx_sess = ort.InferenceSession(p808_model_path)
27
+
28
+ def audio_melspec(self, audio, n_mels=120, frame_size=320, hop_length=160, sr=16000, to_db=True):
29
+ mel_spec = librosa.feature.melspectrogram(y=audio, sr=sr, n_fft=frame_size + 1, hop_length=hop_length, n_mels=n_mels)
30
+ if to_db:
31
+ mel_spec = (librosa.power_to_db(mel_spec, ref=np.max) + 40) / 40
32
+ return mel_spec.T
33
+
34
+ def get_polyfit_val(self, sig, bak, ovr, is_personalized_MOS):
35
+ if is_personalized_MOS:
36
+ p_ovr = np.poly1d([-0.00533021, 0.005101, 1.18058466, -0.11236046])
37
+ p_sig = np.poly1d([-0.01019296, 0.02751166, 1.19576786, -0.24348726])
38
+ p_bak = np.poly1d([-0.04976499, 0.44276479, -0.1644611, 0.96883132])
39
+ else:
40
+ p_ovr = np.poly1d([-0.06766283, 1.11546468, 0.04602535])
41
+ p_sig = np.poly1d([-0.08397278, 1.22083953, 0.0052439])
42
+ p_bak = np.poly1d([-0.13166888, 1.60915514, -0.39604546])
43
+
44
+ sig_poly, bak_poly, ovr_poly = p_sig(sig), p_bak(bak), p_ovr(ovr)
45
+
46
+ return sig_poly, bak_poly, ovr_poly
47
+
48
+ def __call__(self, fpath, sampling_rate, is_personalized_MOS):
49
+ aud, input_fs = sf.read(fpath)
50
+ fs = sampling_rate
51
+ if input_fs != fs:
52
+ audio = librosa.resample(aud, input_fs, fs)
53
+ else:
54
+ audio = aud
55
+ actual_audio_len = len(audio)
56
+ len_samples = int(self.input_length * fs)
57
+ while len(audio) < len_samples:
58
+ audio = np.append(audio, audio)
59
+
60
+ num_hops = int(np.floor(len(audio) / fs) - self.input_length) + 1
61
+ hop_len_samples = fs
62
+ predicted_mos_sig_seg_raw = []
63
+ predicted_mos_bak_seg_raw = []
64
+ predicted_mos_ovr_seg_raw = []
65
+ predicted_mos_sig_seg = []
66
+ predicted_mos_bak_seg = []
67
+ predicted_mos_ovr_seg = []
68
+ predicted_p808_mos = []
69
+
70
+ for idx in range(num_hops):
71
+ audio_seg = audio[int(idx * hop_len_samples): int((idx + self.input_length) * hop_len_samples)]
72
+ if len(audio_seg) < len_samples:
73
+ continue
74
+
75
+ input_features = np.array(audio_seg).astype('float32')[np.newaxis, :]
76
+ p808_input_features = np.array(self.audio_melspec(audio=audio_seg[:-160])).astype('float32')[np.newaxis, :, :]
77
+ oi = {'input_1': input_features}
78
+ p808_oi = {'input_1': p808_input_features}
79
+ p808_mos = self.p808_onnx_sess.run(None, p808_oi)[0][0][0]
80
+ mos_sig_raw, mos_bak_raw, mos_ovr_raw = self.onnx_sess.run(None, oi)[0][0]
81
+ mos_sig, mos_bak, mos_ovr = self.get_polyfit_val(mos_sig_raw, mos_bak_raw, mos_ovr_raw, is_personalized_MOS)
82
+ predicted_mos_sig_seg_raw.append(mos_sig_raw)
83
+ predicted_mos_bak_seg_raw.append(mos_bak_raw)
84
+ predicted_mos_ovr_seg_raw.append(mos_ovr_raw)
85
+ predicted_mos_sig_seg.append(mos_sig)
86
+ predicted_mos_bak_seg.append(mos_bak)
87
+ predicted_mos_ovr_seg.append(mos_ovr)
88
+ predicted_p808_mos.append(p808_mos)
89
+
90
+ clip_dict = {'filename': fpath, 'len_in_sec': actual_audio_len / fs, 'sr': fs}
91
+ clip_dict['num_hops'] = num_hops
92
+ OVRL_raw = np.mean(predicted_mos_ovr_seg_raw)
93
+ SIG_raw = np.mean(predicted_mos_sig_seg_raw)
94
+ BAK_raw = np.mean(predicted_mos_bak_seg_raw)
95
+ OVRL = np.mean(predicted_mos_ovr_seg)
96
+ SIG = np.mean(predicted_mos_sig_seg)
97
+ BAK = np.mean(predicted_mos_bak_seg)
98
+ P808_MOS = np.mean(predicted_p808_mos)
99
+ return OVRL_raw, SIG_raw, BAK_raw, OVRL, SIG, BAK, P808_MOS
100
+
101
+
102
+ def main(args):
103
+ SAMPLING_RATE = 16000
104
+ INPUT_LENGTH = 9.01
105
+
106
+ compute_score = ComputeScore(primary_model_path, p808_model_path, SAMPLING_RATE, INPUT_LENGTH)
107
+
108
+ rows = []
109
+ clips = []
110
+ is_personalized_eval = args.personalized_MOS
111
+ desired_fs = SAMPLING_RATE
112
+
113
+ clips = get_path_list(args.testset_dir, 'wav')
114
+
115
+ for clip in tqdm(clips):
116
+ data = compute_score(clip, desired_fs, is_personalized_eval)
117
+ rows.append(data)
118
+
119
+ df = pd.DataFrame(rows)
120
+ if args.csv_path:
121
+ csv_path = args.csv_path
122
+ df.to_csv(csv_path)
123
+ else:
124
+ print(df.describe())
125
+
126
+
127
+ if __name__ == "__main__":
128
+ parser = argparse.ArgumentParser()
129
+ parser.add_argument('-t', "--testset_dir", default='.',
130
+ help='包含要评估的.wav格式音频剪辑的目录的路径')
131
+ parser.add_argument('-o', "--csv_path", default=None, help='保存结果的csv文件')
132
+ parser.add_argument('-p', "--personalized_MOS", action='store_true',
133
+ help='标志表明是需要个性化的MOS分数还是常规的')
134
+
135
+ args = parser.parse_args()
136
+
137
+ main(args)