neverlib 0.2.8__py3-none-any.whl → 0.2.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- neverlib/QA/ImpactNoiseRejection.py +119 -0
- neverlib/QA/impact_noise_rejection.png +0 -0
- neverlib/QA/out.pcm +0 -0
- neverlib/QA/out.wav +0 -0
- neverlib/audio_aug/README.md +3 -0
- neverlib/audio_aug/__init__.py +0 -54
- neverlib/data_analyze/__init__.py +0 -44
- neverlib/filter/__init__.py +0 -23
- neverlib/filter/auto_eq/__init__.py +0 -36
- neverlib/filter/core.py +8 -5
- neverlib/metrics/README.md +35 -0
- neverlib/metrics/__init__.py +0 -36
- neverlib/metrics/pesq_c/PESQ +0 -0
- neverlib/signal_gen/babble_noise_generate.py +113 -0
- neverlib/tests/__init__.py +0 -17
- neverlib/utils/README.md +29 -0
- neverlib/utils/__init__.py +6 -50
- neverlib/utils/audio_split.py +20 -20
- neverlib/utils/checkGPU.py +52 -79
- neverlib/utils/floder.py +115 -0
- neverlib/utils/pcm.py +42 -0
- neverlib/utils/utils.py +3 -92
- neverlib/vad/__init__.py +0 -38
- {neverlib-0.2.8.dist-info → neverlib-0.2.9.dist-info}/METADATA +15 -1
- neverlib-0.2.9.dist-info/RECORD +119 -0
- neverlib/.claude/settings.local.json +0 -9
- neverlib/.history/Docs/audio_aug/del_20250827162530.py +0 -0
- neverlib/.history/Docs/audio_aug/del_20250827162540.py +0 -2
- neverlib/.history/Docs/audio_aug/del_20250827162541.py +0 -7
- neverlib/.history/Docs/audio_aug/del_20250827162606.py +0 -7
- neverlib/.history/Docs/audio_aug/del_20250827162637.py +0 -8
- neverlib/.history/Docs/audio_aug/del_20250827162645.py +0 -8
- neverlib/.history/Docs/audio_aug/del_20250827162723.py +0 -9
- neverlib/.history/Docs/audio_aug/del_20250827162739.py +0 -9
- neverlib/.history/Docs/audio_aug/test_snr_20250806011311.py +0 -0
- neverlib/.history/Docs/audio_aug/test_snr_20250806011331.py +0 -75
- neverlib/.history/Docs/audio_aug/test_snr_20250806011342.py +0 -57
- neverlib/.history/Docs/audio_aug/test_snr_20250806011352.py +0 -57
- neverlib/.history/Docs/audio_aug/test_snr_20250806011403.py +0 -57
- neverlib/.history/Docs/audio_aug/test_snr_20250806011413.py +0 -57
- neverlib/.history/Docs/audio_aug/test_snr_20250806011435.py +0 -55
- neverlib/.history/Docs/audio_aug/test_snr_20250827161751.py +0 -55
- neverlib/.history/Docs/audio_aug/test_snr_20250827161754.py +0 -55
- neverlib/.history/Docs/audio_aug/test_snr_20250827161833.py +0 -54
- neverlib/.history/Docs/audio_aug/test_snr_20250827162017.py +0 -56
- neverlib/.history/Docs/audio_aug/test_snr_20250827162021.py +0 -57
- neverlib/.history/Docs/audio_aug/test_snr_20250827162028.py +0 -57
- neverlib/.history/Docs/audio_aug/test_snr_20250827162033.py +0 -55
- neverlib/.history/Docs/audio_aug_test/del_20250827162738.py +0 -9
- neverlib/.history/Docs/audio_aug_test/del_20250827162819.py +0 -9
- neverlib/.history/Docs/audio_aug_test/del_20250827162830.py +0 -9
- neverlib/.history/Docs/audio_aug_test/del_20250827162846.py +0 -9
- neverlib/.history/Docs/audio_aug_test/del_20250827162851.py +0 -9
- neverlib/.history/Docs/audio_aug_test/del_20250827162903.py +0 -10
- neverlib/.history/Docs/audio_aug_test/del_20250827162921.py +0 -10
- neverlib/.history/Docs/audio_aug_test/del_20250827162926.py +0 -10
- neverlib/.history/Docs/audio_aug_test/del_20250827163030.py +0 -10
- neverlib/.history/Docs/audio_aug_test/del_20250827163032.py +0 -10
- neverlib/.history/Docs/vad/1_20250810032405.py +0 -0
- neverlib/.history/Docs/vad/1_20250810032417.py +0 -39
- neverlib/.history/QA/html2markdown_20250822234112.md +0 -0
- neverlib/.history/QA/html2markdown_20250822234140.py +0 -9
- neverlib/.history/QA/html2markdown_20250822234141.md +0 -9
- neverlib/.history/QA/html2markdown_20250822234159.py +0 -12
- neverlib/.history/QA/html2markdown_20250822234200.py +0 -17
- neverlib/.history/QA/html2markdown_20250822234236.py +0 -17
- neverlib/.history/QA/html2markdown_20250822234340.py +0 -14
- neverlib/.history/QA/html2markdown_20250822234522.py +0 -18
- neverlib/.history/QA/html2markdown_20250822234601.py +0 -20
- neverlib/.history/QA/html2markdown_20250822234615.py +0 -22
- neverlib/.history/QA/html2markdown_20250822234715.py +0 -28
- neverlib/.history/QA/html2markdown_20250822234720.py +0 -27
- neverlib/.history/QA/html2markdown_20250822234903.py +0 -27
- neverlib/.history/__init___20250805234212.py +0 -41
- neverlib/.history/__init___20250904102635.py +0 -39
- neverlib/.history/__init___20250904102836.py +0 -34
- neverlib/.history/__init___20250904102838.py +0 -39
- neverlib/.history/__init___20250904102851.py +0 -33
- neverlib/.history/audio_aug/audio_aug_20250806010451.py +0 -125
- neverlib/.history/audio_aug/audio_aug_20250806010750.py +0 -138
- neverlib/.history/audio_aug/audio_aug_20250806010759.py +0 -140
- neverlib/.history/audio_aug/audio_aug_20250806010803.py +0 -140
- neverlib/.history/audio_aug/audio_aug_20250806010809.py +0 -140
- neverlib/.history/audio_aug/audio_aug_20250806011108.py +0 -140
- neverlib/.history/audio_aug/audio_aug_20250826155913.py +0 -158
- neverlib/.history/audio_aug/audio_aug_20250826164159.py +0 -159
- neverlib/.history/audio_aug/audio_aug_20250826164217.py +0 -160
- neverlib/.history/audio_aug/audio_aug_20250826164408.py +0 -161
- neverlib/.history/audio_aug/audio_aug_20250826164423.py +0 -161
- neverlib/.history/audio_aug/audio_aug_20250826164529.py +0 -161
- neverlib/.history/audio_aug/audio_aug_20250826164824.py +0 -161
- neverlib/.history/audio_aug/audio_aug_20250826164932.py +0 -162
- neverlib/.history/audio_aug/audio_aug_20250826164947.py +0 -162
- neverlib/.history/audio_aug/audio_aug_20250826165403.py +0 -162
- neverlib/.history/audio_aug/audio_aug_20250826165421.py +0 -162
- neverlib/.history/audio_aug/audio_aug_20250826165509.py +0 -163
- neverlib/.history/audio_aug/audio_aug_20250826165702.py +0 -163
- neverlib/.history/audio_aug/audio_aug_20250826165732.py +0 -165
- neverlib/.history/audio_aug/audio_aug_20250826170041.py +0 -163
- neverlib/.history/audio_aug/audio_aug_20250826170105.py +0 -164
- neverlib/.history/audio_aug/audio_aug_20250826170154.py +0 -164
- neverlib/.history/audio_aug/audio_aug_20250826170220.py +0 -165
- neverlib/.history/audio_aug/audio_aug_20250826170221.py +0 -165
- neverlib/.history/audio_aug/audio_aug_20250826170228.py +0 -165
- neverlib/.history/audio_aug/audio_aug_20250826170231.py +0 -165
- neverlib/.history/audio_aug/audio_aug_20250826212001.py +0 -165
- neverlib/.history/audio_aug/audio_aug_20250826220038.py +0 -165
- neverlib/.history/audio_aug/audio_aug_20250826220133.py +0 -165
- neverlib/.history/audio_aug/audio_aug_20250826220148.py +0 -165
- neverlib/.history/audio_aug/audio_aug_20250826220154.py +0 -165
- neverlib/.history/audio_aug/audio_aug_20250826220156.py +0 -165
- neverlib/.history/audio_aug/audio_aug_20250826220314.py +0 -165
- neverlib/.history/audio_aug/audio_aug_20250826220343.py +0 -184
- neverlib/.history/audio_aug/audio_aug_20250826220345.py +0 -184
- neverlib/.history/audio_aug/audio_aug_20250826220349.py +0 -184
- neverlib/.history/audio_aug/audio_aug_20250826220429.py +0 -184
- neverlib/.history/audio_aug/audio_aug_20250826220447.py +0 -184
- neverlib/.history/audio_aug/audio_aug_20250826220601.py +0 -186
- neverlib/.history/audio_aug/audio_aug_20250826220638.py +0 -186
- neverlib/.history/audio_aug/audio_aug_20250826220641.py +0 -186
- neverlib/.history/audio_aug/audio_aug_20250826220647.py +0 -186
- neverlib/.history/audio_aug/audio_aug_20250826220653.py +0 -186
- neverlib/.history/audio_aug/audio_aug_20250826220655.py +0 -186
- neverlib/.history/audio_aug/audio_aug_20250826220731.py +0 -185
- neverlib/.history/audio_aug/audio_aug_20250826220739.py +0 -185
- neverlib/.history/audio_aug/audio_aug_20250826220747.py +0 -185
- neverlib/.history/audio_aug/audio_aug_20250826220801.py +0 -186
- neverlib/.history/audio_aug/audio_aug_20250826220822.py +0 -186
- neverlib/.history/audio_aug/audio_aug_20250826220901.py +0 -186
- neverlib/.history/audio_aug/audio_aug_20250826221107.py +0 -187
- neverlib/.history/audio_aug/audio_aug_20250826221310.py +0 -188
- neverlib/.history/audio_aug/audio_aug_20250826221353.py +0 -191
- neverlib/.history/audio_aug/audio_aug_20250826221821.py +0 -191
- neverlib/.history/audio_aug/audio_aug_20250826221838.py +0 -191
- neverlib/.history/audio_aug/audio_aug_20250826221906.py +0 -191
- neverlib/.history/audio_aug/audio_aug_20250826221930.py +0 -191
- neverlib/.history/audio_aug/audio_aug_20250826221939.py +0 -191
- neverlib/.history/audio_aug/audio_aug_20250826221955.py +0 -191
- neverlib/.history/audio_aug/audio_aug_20250826222008.py +0 -197
- neverlib/.history/audio_aug/audio_aug_20250826222017.py +0 -200
- neverlib/.history/audio_aug/audio_aug_20250826222046.py +0 -203
- neverlib/.history/audio_aug/audio_aug_20250826222105.py +0 -203
- neverlib/.history/audio_aug/audio_aug_20250826222206.py +0 -203
- neverlib/.history/audio_aug/audio_aug_20250826222302.py +0 -203
- neverlib/.history/audio_aug/audio_aug_20250826222336.py +0 -203
- neverlib/.history/audio_aug/audio_aug_20250826222455.py +0 -204
- neverlib/.history/audio_aug/audio_aug_20250826222526.py +0 -204
- neverlib/.history/audio_aug/audio_aug_20250826222541.py +0 -204
- neverlib/.history/audio_aug/audio_aug_20250826222624.py +0 -202
- neverlib/.history/audio_aug/audio_aug_20250826222714.py +0 -205
- neverlib/.history/audio_aug/audio_aug_20250826222820.py +0 -205
- neverlib/.history/audio_aug/audio_aug_20250826222827.py +0 -205
- neverlib/.history/audio_aug/audio_aug_20250826222927.py +0 -232
- neverlib/.history/audio_aug/audio_aug_20250826223009.py +0 -232
- neverlib/.history/audio_aug/audio_aug_20250826223054.py +0 -232
- neverlib/.history/audio_aug/audio_aug_20250826223225.py +0 -233
- neverlib/.history/audio_aug/audio_aug_20250826223344.py +0 -236
- neverlib/.history/audio_aug/audio_aug_20250826223356.py +0 -236
- neverlib/.history/audio_aug/audio_aug_20250826223955.py +0 -242
- neverlib/.history/audio_aug/audio_aug_20250826224210.py +0 -240
- neverlib/.history/audio_aug/audio_aug_20250826224250.py +0 -242
- neverlib/.history/audio_aug/audio_aug_20250826224323.py +0 -280
- neverlib/.history/audio_aug/audio_aug_20250826224452.py +0 -263
- neverlib/.history/audio_aug/audio_aug_20250826224455.py +0 -263
- neverlib/.history/audio_aug/audio_aug_20250826224502.py +0 -263
- neverlib/.history/audio_aug/audio_aug_20250826224528.py +0 -263
- neverlib/.history/audio_aug/audio_aug_20250826224658.py +0 -263
- neverlib/.history/audio_aug/audio_aug_20250826224833.py +0 -264
- neverlib/.history/audio_aug/audio_aug_20250826225013.py +0 -269
- neverlib/.history/audio_aug/audio_aug_20250826225050.py +0 -269
- neverlib/.history/audio_aug/audio_aug_20250826225241.py +0 -268
- neverlib/.history/audio_aug/audio_aug_20250826225315.py +0 -266
- neverlib/.history/audio_aug/audio_aug_20250826225404.py +0 -266
- neverlib/.history/audio_aug/audio_aug_20250826225502.py +0 -265
- neverlib/.history/audio_aug/audio_aug_20250826225950.py +0 -267
- neverlib/.history/audio_aug/audio_aug_20250826225959.py +0 -268
- neverlib/.history/audio_aug/audio_aug_20250826230222.py +0 -271
- neverlib/.history/audio_aug/audio_aug_20250826230248.py +0 -270
- neverlib/.history/audio_aug/audio_aug_20250826230638.py +0 -266
- neverlib/.history/audio_aug/audio_aug_20250826230755.py +0 -266
- neverlib/.history/audio_aug/audio_aug_20250826230941.py +0 -265
- neverlib/.history/audio_aug/audio_aug_20250826231054.py +0 -266
- neverlib/.history/audio_aug/audio_aug_20250826231117.py +0 -266
- neverlib/.history/audio_aug/audio_aug_20250826231219.py +0 -266
- neverlib/.history/audio_aug/audio_aug_20250826232330.py +0 -266
- neverlib/.history/audio_aug/audio_aug_20250826232352.py +0 -266
- neverlib/.history/audio_aug/audio_aug_20250827152748.py +0 -268
- neverlib/.history/audio_aug/audio_aug_20250827152806.py +0 -268
- neverlib/.history/audio_aug/audio_aug_20250827152808.py +0 -268
- neverlib/.history/audio_aug/audio_aug_20250827152917.py +0 -283
- neverlib/.history/audio_aug/audio_aug_20250827152929.py +0 -281
- neverlib/.history/audio_aug/audio_aug_20250827153100.py +0 -286
- neverlib/.history/audio_aug/audio_aug_20250827153102.py +0 -286
- neverlib/.history/audio_aug/audio_aug_20250827153301.py +0 -295
- neverlib/.history/audio_aug/audio_aug_20250827153331.py +0 -298
- neverlib/.history/audio_aug/audio_aug_20250827153525.py +0 -303
- neverlib/.history/audio_aug/audio_aug_20250827153533.py +0 -304
- neverlib/.history/audio_aug/audio_aug_20250827153541.py +0 -321
- neverlib/.history/audio_aug/audio_aug_20250827153805.py +0 -322
- neverlib/.history/audio_aug/audio_aug_20250827153832.py +0 -323
- neverlib/.history/audio_aug/audio_aug_20250827153836.py +0 -324
- neverlib/.history/audio_aug/audio_aug_20250827153846.py +0 -324
- neverlib/.history/audio_aug/audio_aug_20250827153859.py +0 -325
- neverlib/.history/audio_aug/audio_aug_20250827154453.py +0 -337
- neverlib/.history/audio_aug/audio_aug_20250827154513.py +0 -355
- neverlib/.history/audio_aug/audio_aug_20250827154538.py +0 -356
- neverlib/.history/audio_aug/audio_aug_20250827154541.py +0 -357
- neverlib/.history/audio_aug/audio_aug_20250827154612.py +0 -357
- neverlib/.history/audio_aug/audio_aug_20250827154657.py +0 -360
- neverlib/.history/audio_aug/audio_aug_20250827154708.py +0 -360
- neverlib/.history/audio_aug/audio_aug_20250827154728.py +0 -366
- neverlib/.history/audio_aug/audio_aug_20250827154755.py +0 -367
- neverlib/.history/audio_aug/audio_aug_20250827154800.py +0 -367
- neverlib/.history/audio_aug/audio_aug_20250827154917.py +0 -368
- neverlib/.history/audio_aug/audio_aug_20250827154928.py +0 -369
- neverlib/.history/audio_aug/audio_aug_20250827154932.py +0 -370
- neverlib/.history/audio_aug/audio_aug_20250827154947.py +0 -372
- neverlib/.history/audio_aug/audio_aug_20250827155015.py +0 -375
- neverlib/.history/audio_aug/audio_aug_20250827155106.py +0 -375
- neverlib/.history/audio_aug/audio_aug_20250827155114.py +0 -393
- neverlib/.history/audio_aug/audio_aug_20250827155207.py +0 -415
- neverlib/.history/audio_aug/audio_aug_20250827155300.py +0 -415
- neverlib/.history/audio_aug/audio_aug_20250827155321.py +0 -471
- neverlib/.history/audio_aug/audio_aug_20250827164703.py +0 -471
- neverlib/.history/audio_aug/audio_aug_20250827164749.py +0 -471
- neverlib/.history/audio_aug/audio_aug_20250827165252.py +0 -472
- neverlib/.history/audio_aug/audio_aug_20250827165334.py +0 -472
- neverlib/.history/audio_aug/audio_aug_20250827165404.py +0 -473
- neverlib/.history/audio_aug/audio_aug_20250827165610.py +0 -473
- neverlib/.history/audio_aug/audio_aug_20250827165805.py +0 -473
- neverlib/.history/audio_aug/audio_aug_20250827170056.py +0 -473
- neverlib/.history/audio_aug/audio_aug_20250827170106.py +0 -472
- neverlib/.history/audio_aug/audio_aug_20250827170143.py +0 -472
- neverlib/.history/audio_aug/audio_aug_20250827170216.py +0 -472
- neverlib/.history/audio_aug/audio_aug_20250827170218.py +0 -472
- neverlib/.history/audio_aug/audio_aug_20250827170314.py +0 -472
- neverlib/.history/audio_aug/audio_aug_20250827171500.py +0 -471
- neverlib/.history/audio_aug/audio_aug_20250827172347.py +0 -471
- neverlib/.history/audio_aug/audio_aug_20250827172558.py +0 -470
- neverlib/.history/audio_aug/audio_aug_20250827172559.py +0 -470
- neverlib/.history/audio_aug/audio_aug_20250827172801.py +0 -470
- neverlib/.history/audio_aug/audio_aug_20250827182522.py +0 -470
- neverlib/.history/audio_aug/audio_aug_20250827182526.py +0 -470
- neverlib/.history/audio_aug/audio_aug_20250827182626.py +0 -470
- neverlib/.history/audio_aug/audio_aug_20250827182715.py +0 -470
- neverlib/.history/audio_aug/audio_aug_20250904185444.py +0 -470
- neverlib/.history/audio_aug/audio_aug_20250904185538.py +0 -445
- neverlib/.history/dataAnalyze/__init___20250805234204.py +0 -87
- neverlib/.history/dataAnalyze/__init___20250806204125.py +0 -14
- neverlib/.history/dataAnalyze/__init___20250806204139.py +0 -14
- neverlib/.history/dataAnalyze/__init___20250806204159.py +0 -14
- neverlib/.history/data_analyze/__init___20250806204158.py +0 -14
- neverlib/.history/data_analyze/__init___20250827163248.py +0 -14
- neverlib/.history/filter/__init___20250820103351.py +0 -70
- neverlib/.history/filter/__init___20250821102348.py +0 -70
- neverlib/.history/filter/__init___20250821102405.py +0 -14
- neverlib/.history/filter/auto_eq/__init___20250819213121.py +0 -36
- neverlib/.history/filter/auto_eq/__init___20250821102241.py +0 -36
- neverlib/.history/filter/auto_eq/__init___20250821102259.py +0 -36
- neverlib/.history/filter/auto_eq/__init___20250821102307.py +0 -36
- neverlib/.history/filter/auto_eq/__init___20250821102310.py +0 -36
- neverlib/.history/filter/auto_eq/__init___20250821102318.py +0 -36
- neverlib/.history/filter/auto_eq/__init___20250821102507.py +0 -36
- neverlib/.history/filter/auto_eq/de_eq_20250820103848.py +0 -361
- neverlib/.history/filter/auto_eq/de_eq_20250821102422.py +0 -360
- neverlib/.history/filter/auto_eq/freq_eq_20250805234206.py +0 -75
- neverlib/.history/filter/auto_eq/freq_eq_20250820140732.py +0 -75
- neverlib/.history/filter/auto_eq/freq_eq_20250820140745.py +0 -75
- neverlib/.history/filter/auto_eq/freq_eq_20250820140816.py +0 -75
- neverlib/.history/filter/auto_eq/freq_eq_20250820140938.py +0 -77
- neverlib/.history/filter/auto_eq/freq_eq_20250820141003.py +0 -77
- neverlib/.history/filter/auto_eq/freq_eq_20250820141006.py +0 -77
- neverlib/.history/filter/auto_eq/freq_eq_20250820141019.py +0 -77
- neverlib/.history/filter/auto_eq/freq_eq_20250820141049.py +0 -77
- neverlib/.history/filter/auto_eq/freq_eq_20250820141211.py +0 -77
- neverlib/.history/filter/auto_eq/freq_eq_20250820141227.py +0 -77
- neverlib/.history/filter/auto_eq/freq_eq_20250820141311.py +0 -78
- neverlib/.history/filter/auto_eq/freq_eq_20250820141340.py +0 -78
- neverlib/.history/filter/auto_eq/freq_eq_20250820141712.py +0 -78
- neverlib/.history/filter/auto_eq/freq_eq_20250820141733.py +0 -78
- neverlib/.history/filter/auto_eq/freq_eq_20250820141755.py +0 -78
- neverlib/.history/filter/auto_eq/freq_eq_20250821102434.py +0 -76
- neverlib/.history/filter/auto_eq/freq_eq_20250821102500.py +0 -76
- neverlib/.history/filter/auto_eq/freq_eq_20250821102502.py +0 -76
- neverlib/.history/filter/auto_eq/freq_eq_20250821143140.py +0 -76
- neverlib/.history/filter/auto_eq/freq_eq_20250821153208.py +0 -76
- neverlib/.history/filter/auto_eq/freq_eq_20250821153214.py +0 -76
- neverlib/.history/filter/auto_eq/ga_eq_basic_20250820102957.py +0 -380
- neverlib/.history/filter/auto_eq/ga_eq_basic_20250820113054.py +0 -380
- neverlib/.history/filter/auto_eq/ga_eq_basic_20250820113150.py +0 -380
- neverlib/.history/filter/auto_eq/ga_eq_basic_20250820113520.py +0 -385
- neverlib/.history/filter/auto_eq/ga_eq_basic_20250820113525.py +0 -385
- neverlib/.history/filter/auto_eq/ga_eq_basic_20250821102212.py +0 -385
- neverlib/.history/filter/auto_eq/ga_eq_basic_20250901110521.py +0 -385
- neverlib/.history/filter/auto_eq/ga_eq_basic_20250901110652.py +0 -385
- neverlib/.history/filter/common_20250806002134.py +0 -37
- neverlib/.history/filter/common_20250821120448.py +0 -49
- neverlib/.history/filter/common_20250821120453.py +0 -49
- neverlib/.history/metrics/dnsmos_20250806001612.py +0 -160
- neverlib/.history/metrics/dnsmos_20250815180659.py +0 -160
- neverlib/.history/metrics/dnsmos_20250815180701.py +0 -158
- neverlib/.history/metrics/dnsmos_20250815181321.py +0 -154
- neverlib/.history/metrics/dnsmos_20250815181327.py +0 -154
- neverlib/.history/metrics/dnsmos_20250815181331.py +0 -154
- neverlib/.history/metrics/dnsmos_20250815181620.py +0 -154
- neverlib/.history/metrics/dnsmos_20250815181631.py +0 -154
- neverlib/.history/metrics/dnsmos_20250815181742.py +0 -154
- neverlib/.history/metrics/dnsmos_20250815181824.py +0 -153
- neverlib/.history/metrics/dnsmos_20250815181834.py +0 -153
- neverlib/.history/metrics/dnsmos_20250815181922.py +0 -153
- neverlib/.history/metrics/dnsmos_20250815182011.py +0 -147
- neverlib/.history/metrics/dnsmos_20250815182036.py +0 -144
- neverlib/.history/metrics/dnsmos_20250815182936.py +0 -143
- neverlib/.history/metrics/dnsmos_20250815182942.py +0 -143
- neverlib/.history/metrics/dnsmos_20250815183032.py +0 -137
- neverlib/.history/metrics/dnsmos_20250815183101.py +0 -144
- neverlib/.history/metrics/dnsmos_20250815183121.py +0 -144
- neverlib/.history/metrics/dnsmos_20250815183123.py +0 -143
- neverlib/.history/metrics/dnsmos_20250815183214.py +0 -143
- neverlib/.history/metrics/dnsmos_20250815183240.py +0 -143
- neverlib/.history/metrics/dnsmos_20250815183248.py +0 -144
- neverlib/.history/metrics/dnsmos_20250815183407.py +0 -142
- neverlib/.history/metrics/dnsmos_20250815183409.py +0 -142
- neverlib/.history/metrics/dnsmos_20250815183431.py +0 -142
- neverlib/.history/metrics/dnsmos_20250815183507.py +0 -140
- neverlib/.history/metrics/dnsmos_20250815183513.py +0 -139
- neverlib/.history/metrics/dnsmos_20250815183618.py +0 -139
- neverlib/.history/metrics/dnsmos_20250815183709.py +0 -140
- neverlib/.history/metrics/dnsmos_20250815183756.py +0 -137
- neverlib/.history/metrics/dnsmos_20250815183815.py +0 -128
- neverlib/.history/metrics/dnsmos_20250815183827.py +0 -129
- neverlib/.history/metrics/dnsmos_20250815183913.py +0 -117
- neverlib/.history/metrics/dnsmos_20250815183914.py +0 -117
- neverlib/.history/metrics/dnsmos_20250815184003.py +0 -118
- neverlib/.history/metrics/dnsmos_20250815184040.py +0 -118
- neverlib/.history/metrics/dnsmos_20250815184049.py +0 -118
- neverlib/.history/metrics/dnsmos_20250815184104.py +0 -117
- neverlib/.history/metrics/dnsmos_20250815184200.py +0 -117
- neverlib/.history/metrics/lpc_lsp_metric_20250816015944.py +0 -128
- neverlib/.history/metrics/lpc_lsp_metric_20250816020142.py +0 -128
- neverlib/.history/metrics/lpc_lsp_metric_20250816020156.py +0 -128
- neverlib/.history/metrics/lpc_lsp_metric_20250816020554.py +0 -130
- neverlib/.history/metrics/lpc_lsp_metric_20250816020600.py +0 -125
- neverlib/.history/metrics/lpc_lsp_metric_20250816020631.py +0 -120
- neverlib/.history/metrics/lpc_lsp_metric_20250816020746.py +0 -118
- neverlib/.history/metrics/lpc_me_20250816013111.py +0 -0
- neverlib/.history/metrics/lpc_me_20250816013129.py +0 -121
- neverlib/.history/metrics/lpc_me_20250816015430.py +0 -103
- neverlib/.history/metrics/lpc_me_20250816015535.py +0 -96
- neverlib/.history/metrics/lpc_me_20250816015542.py +0 -96
- neverlib/.history/metrics/lpc_me_20250816015636.py +0 -97
- neverlib/.history/metrics/lpc_me_20250816015658.py +0 -104
- neverlib/.history/metrics/lpc_me_20250816015703.py +0 -100
- neverlib/.history/metrics/lpc_me_20250816015945.py +0 -128
- neverlib/.history/metrics/snr_20250806010538.py +0 -177
- neverlib/.history/metrics/snr_20250806211634.py +0 -184
- neverlib/.history/metrics/snr_20250827224201.py +0 -182
- neverlib/.history/metrics/snr_20250827234019.py +0 -186
- neverlib/.history/metrics/snr_20250827234028.py +0 -186
- neverlib/.history/metrics/snr_20250827234030.py +0 -186
- neverlib/.history/metrics/spec_20250805234209.py +0 -45
- neverlib/.history/metrics/spec_20250816135530.py +0 -11
- neverlib/.history/metrics/spec_20250816135654.py +0 -16
- neverlib/.history/metrics/spec_20250816135736.py +0 -68
- neverlib/.history/metrics/spec_20250816135904.py +0 -75
- neverlib/.history/metrics/spec_20250816135921.py +0 -82
- neverlib/.history/metrics/spec_20250816140111.py +0 -82
- neverlib/.history/metrics/spec_20250816140543.py +0 -136
- neverlib/.history/metrics/spec_20250816140559.py +0 -172
- neverlib/.history/metrics/spec_20250816140602.py +0 -172
- neverlib/.history/metrics/spec_20250816140608.py +0 -172
- neverlib/.history/metrics/spec_20250816140654.py +0 -148
- neverlib/.history/metrics/spec_20250816140705.py +0 -144
- neverlib/.history/metrics/spec_20250816140755.py +0 -138
- neverlib/.history/metrics/spec_20250816140823.py +0 -170
- neverlib/.history/metrics/spec_20250816140832.py +0 -170
- neverlib/.history/metrics/spec_20250816140833.py +0 -170
- neverlib/.history/metrics/spec_20250816140922.py +0 -147
- neverlib/.history/metrics/spec_20250816141148.py +0 -107
- neverlib/.history/metrics/spec_20250816141219.py +0 -123
- neverlib/.history/metrics/spec_20250816141732.py +0 -178
- neverlib/.history/metrics/spec_20250816141740.py +0 -178
- neverlib/.history/metrics/spec_20250816142030.py +0 -178
- neverlib/.history/metrics/spec_20250816142107.py +0 -135
- neverlib/.history/metrics/spec_20250816142126.py +0 -135
- neverlib/.history/metrics/spec_20250816142410.py +0 -135
- neverlib/.history/metrics/spec_20250816142415.py +0 -136
- neverlib/.history/metrics/spec_metric_20250816135156.py +0 -0
- neverlib/.history/metrics/spec_metric_20250816135226.py +0 -5
- neverlib/.history/metrics/spec_metric_20250816135227.py +0 -10
- neverlib/.history/metrics/spec_metric_20250816135306.py +0 -15
- neverlib/.history/metrics/spec_metric_20250816135442.py +0 -31
- neverlib/.history/metrics/spec_metric_20250816135448.py +0 -31
- neverlib/.history/metrics/spec_metric_20250816135520.py +0 -29
- neverlib/.history/metrics/spec_metric_20250816135537.py +0 -63
- neverlib/.history/metrics/spec_metric_20250816135653.py +0 -65
- neverlib/.history/utils/audio_split_20250805234209.py +0 -268
- neverlib/.history/utils/audio_split_20250904185309.py +0 -268
- neverlib/.history/utils/utils_20250813165516.py +0 -330
- neverlib/.history/utils/utils_20250904181341.py +0 -328
- neverlib/.history/utils/utils_20250904185546.py +0 -352
- neverlib/.history/utils/utils_20250904185548.py +0 -353
- neverlib/.history/utils/utils_20250904185603.py +0 -353
- neverlib/.history/utils/utils_20250904185636.py +0 -353
- neverlib/.history/utils/utils_20250904185658.py +0 -358
- neverlib/.history/utils/utils_20250904190053.py +0 -359
- neverlib/.history/vad/PreProcess_20250805234211.py +0 -63
- neverlib/.history/vad/PreProcess_20250809232455.py +0 -63
- neverlib/.history/vad/PreProcess_20250816020725.py +0 -66
- neverlib/.history/vad/VAD_Silero_20250805234211.py +0 -50
- neverlib/.history/vad/VAD_Silero_20250809232456.py +0 -50
- neverlib/.history/vad/VAD_WebRTC_20250805234211.py +0 -61
- neverlib/.history/vad/VAD_WebRTC_20250809232456.py +0 -61
- neverlib/.history/vad/VAD_funasr_20250805234211.py +0 -54
- neverlib/.history/vad/VAD_funasr_20250809232456.py +0 -54
- neverlib/.history/vad/VAD_vadlib_20250805234211.py +0 -70
- neverlib/.history/vad/VAD_vadlib_20250809232455.py +0 -70
- neverlib/.history/vad/VAD_whisper_20250805234211.py +0 -55
- neverlib/.history/vad/VAD_whisper_20250809232456.py +0 -55
- neverlib/.specstory/.what-is-this.md +0 -69
- neverlib/.specstory/history/2025-08-05_17-06Z-/350/277/231/344/270/200/346/255/245/347/232/204/347/233/256/347/232/204/346/230/257/344/273/200/344/271/210.md +0 -424
- neverlib/.specstory/history/2025-08-22_02-10Z-/345/256/214/345/226/204/345/207/275/346/225/260/347/232/204/345/212/237/350/203/275/345/222/214/345/217/230/351/207/217/345/220/215/345/273/272/350/256/256.md +0 -247
- neverlib/.specstory/history/2025-08-26_11-54Z-oserror-missing-shared-object-file.md +0 -87
- neverlib/.specstory/history/2025-08-27_08-07Z-/345/256/214/345/226/204/346/265/213/350/257/225/346/226/207/346/241/243/347/232/204/350/256/250/350/256/272.md +0 -296
- neverlib/.specstory/history/2025-08-27_08-29Z-delete-python-file-command.md +0 -211
- neverlib/.specstory/history/2025-08-27_09-05Z-/345/234/250jupyter/344/270/255/346/222/255/346/224/276/351/237/263/351/242/221/347/232/204/344/273/243/347/240/201/344/277/256/346/224/271.md +0 -357
- neverlib-0.2.8.dist-info/RECORD +0 -510
- {neverlib-0.2.8.dist-info → neverlib-0.2.9.dist-info}/WHEEL +0 -0
- {neverlib-0.2.8.dist-info → neverlib-0.2.9.dist-info}/licenses/LICENSE +0 -0
- {neverlib-0.2.8.dist-info → neverlib-0.2.9.dist-info}/top_level.txt +0 -0
|
@@ -1,140 +0,0 @@
|
|
|
1
|
-
'''
|
|
2
|
-
Author: 凌逆战 | Never
|
|
3
|
-
Date: 2025-08-06 10:00:00
|
|
4
|
-
Description:
|
|
5
|
-
要计算个性化 MOS 分数(干扰说话者受到惩罚),请提供“-p”参数,例如:python dnsmos.py -t ./SampleClips -o sample.csv -p
|
|
6
|
-
要计算常规 MOS 分数,请省略“-p”参数。例如:python dnsmos.py -t ./SampleClips -o sample.csv
|
|
7
|
-
'''
|
|
8
|
-
import argparse
|
|
9
|
-
import concurrent.futures
|
|
10
|
-
import glob
|
|
11
|
-
import os
|
|
12
|
-
import librosa
|
|
13
|
-
import numpy as np
|
|
14
|
-
import onnxruntime as ort
|
|
15
|
-
import pandas as pd
|
|
16
|
-
import soundfile as sf
|
|
17
|
-
from tqdm import tqdm
|
|
18
|
-
from neverlib.utils import get_path_list
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
class ComputeScore:
|
|
22
|
-
def __init__(self, is_personalized_MOS, sampling_rate, input_length) -> None:
|
|
23
|
-
self.sr = sampling_rate
|
|
24
|
-
self.input_length = input_length
|
|
25
|
-
p808_model_path = "./DNSMOS/model_v8.onnx"
|
|
26
|
-
if is_personalized_MOS:
|
|
27
|
-
primary_model_path = "./pDNSMOS/sig_bak_ovr.onnx"
|
|
28
|
-
else:
|
|
29
|
-
primary_model_path = "./DNSMOS/sig_bak_ovr.onnx"
|
|
30
|
-
|
|
31
|
-
self.onnx_sess = ort.InferenceSession(primary_model_path)
|
|
32
|
-
self.p808_onnx_sess = ort.InferenceSession(p808_model_path)
|
|
33
|
-
|
|
34
|
-
def audio_melspec(self, audio, n_mels=120, frame_size=320, hop_length=160, to_db=True):
|
|
35
|
-
mel_spec = librosa.feature.melspectrogram(y=audio, sr=self.sr, n_fft=frame_size + 1, hop_length=hop_length, n_mels=n_mels)
|
|
36
|
-
if to_db:
|
|
37
|
-
mel_spec = (librosa.power_to_db(mel_spec, ref=np.max) + 40) / 40
|
|
38
|
-
print(type(mel_spec))
|
|
39
|
-
return mel_spec.T
|
|
40
|
-
|
|
41
|
-
def get_polyfit_val(self, sig, bak, ovr, is_personalized_MOS):
|
|
42
|
-
if is_personalized_MOS:
|
|
43
|
-
p_ovr = np.poly1d([-0.00533021, 0.005101, 1.18058466, -0.11236046])
|
|
44
|
-
p_sig = np.poly1d([-0.01019296, 0.02751166, 1.19576786, -0.24348726])
|
|
45
|
-
p_bak = np.poly1d([-0.04976499, 0.44276479, -0.1644611, 0.96883132])
|
|
46
|
-
else:
|
|
47
|
-
p_ovr = np.poly1d([-0.06766283, 1.11546468, 0.04602535])
|
|
48
|
-
p_sig = np.poly1d([-0.08397278, 1.22083953, 0.0052439])
|
|
49
|
-
p_bak = np.poly1d([-0.13166888, 1.60915514, -0.39604546])
|
|
50
|
-
|
|
51
|
-
sig_poly, bak_poly, ovr_poly = p_sig(sig), p_bak(bak), p_ovr(ovr)
|
|
52
|
-
|
|
53
|
-
return sig_poly, bak_poly, ovr_poly
|
|
54
|
-
|
|
55
|
-
def __call__(self, wav_path):
|
|
56
|
-
wav, wav_sr = sf.read(wav_path, dtype='float32')
|
|
57
|
-
if wav_sr != self.sr:
|
|
58
|
-
wav = librosa.resample(wav, wav_sr, self.sr)
|
|
59
|
-
else:
|
|
60
|
-
wav = wav
|
|
61
|
-
len_samples = int(self.input_length * self.sr)
|
|
62
|
-
while len(wav) < len_samples:
|
|
63
|
-
wav = np.append(wav, wav)
|
|
64
|
-
|
|
65
|
-
num_hops = int(np.floor(len(wav) / self.sr) - self.input_length) + 1
|
|
66
|
-
hop_len_samples = self.sr
|
|
67
|
-
predicted_mos_sig_seg_raw = []
|
|
68
|
-
predicted_mos_bak_seg_raw = []
|
|
69
|
-
predicted_mos_ovr_seg_raw = []
|
|
70
|
-
predicted_mos_sig_seg = []
|
|
71
|
-
predicted_mos_bak_seg = []
|
|
72
|
-
predicted_mos_ovr_seg = []
|
|
73
|
-
predicted_p808_mos = []
|
|
74
|
-
|
|
75
|
-
for idx in range(num_hops):
|
|
76
|
-
wav_seg = wav[int(idx * hop_len_samples): int((idx + self.input_length) * hop_len_samples)]
|
|
77
|
-
if len(wav_seg) < len_samples:
|
|
78
|
-
continue
|
|
79
|
-
|
|
80
|
-
input_features = np.array(wav_seg)[np.newaxis, :]
|
|
81
|
-
p808_input_features = np.array(self.audio_melspec(audio=wav_seg[:-160])).astype('float32')[np.newaxis, :, :]
|
|
82
|
-
oi = {'input_1': input_features}
|
|
83
|
-
p808_oi = {'input_1': p808_input_features}
|
|
84
|
-
p808_mos = self.p808_onnx_sess.run(None, p808_oi)[0][0][0]
|
|
85
|
-
mos_sig_raw, mos_bak_raw, mos_ovr_raw = self.onnx_sess.run(None, oi)[0][0]
|
|
86
|
-
mos_sig, mos_bak, mos_ovr = self.get_polyfit_val(mos_sig_raw, mos_bak_raw, mos_ovr_raw, self.is_personalized_MOS)
|
|
87
|
-
predicted_mos_sig_seg_raw.append(mos_sig_raw)
|
|
88
|
-
predicted_mos_bak_seg_raw.append(mos_bak_raw)
|
|
89
|
-
predicted_mos_ovr_seg_raw.append(mos_ovr_raw)
|
|
90
|
-
predicted_mos_sig_seg.append(mos_sig)
|
|
91
|
-
predicted_mos_bak_seg.append(mos_bak)
|
|
92
|
-
predicted_mos_ovr_seg.append(mos_ovr)
|
|
93
|
-
predicted_p808_mos.append(p808_mos)
|
|
94
|
-
|
|
95
|
-
OVRL_raw = np.mean(predicted_mos_ovr_seg_raw)
|
|
96
|
-
SIG_raw = np.mean(predicted_mos_sig_seg_raw)
|
|
97
|
-
BAK_raw = np.mean(predicted_mos_bak_seg_raw)
|
|
98
|
-
OVRL = np.mean(predicted_mos_ovr_seg)
|
|
99
|
-
SIG = np.mean(predicted_mos_sig_seg)
|
|
100
|
-
BAK = np.mean(predicted_mos_bak_seg)
|
|
101
|
-
P808_MOS = np.mean(predicted_p808_mos)
|
|
102
|
-
return OVRL_raw, SIG_raw, BAK_raw, OVRL, SIG, BAK, P808_MOS
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
def main(args):
|
|
106
|
-
SAMPLING_RATE = 16000
|
|
107
|
-
INPUT_LENGTH = 9.01
|
|
108
|
-
personalized_MOS = args.personalized_MOS
|
|
109
|
-
|
|
110
|
-
compute_score = ComputeScore(personalized_MOS, SAMPLING_RATE, INPUT_LENGTH)
|
|
111
|
-
|
|
112
|
-
rows = []
|
|
113
|
-
clips = []
|
|
114
|
-
desired_fs = SAMPLING_RATE
|
|
115
|
-
|
|
116
|
-
clips = get_path_list(args.testset_dir, 'wav')
|
|
117
|
-
|
|
118
|
-
for clip in tqdm(clips):
|
|
119
|
-
data = compute_score(clip, desired_fs, is_personalized_eval)
|
|
120
|
-
rows.append(data)
|
|
121
|
-
|
|
122
|
-
df = pd.DataFrame(rows)
|
|
123
|
-
if args.csv_path:
|
|
124
|
-
csv_path = args.csv_path
|
|
125
|
-
df.to_csv(csv_path)
|
|
126
|
-
else:
|
|
127
|
-
print(df.describe())
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
if __name__ == "__main__":
|
|
131
|
-
parser = argparse.ArgumentParser()
|
|
132
|
-
parser.add_argument('-t', "--testset_dir", default='.',
|
|
133
|
-
help='包含要评估的.wav格式音频剪辑的目录的路径')
|
|
134
|
-
parser.add_argument('-o', "--csv_path", default=None, help='保存结果的csv文件')
|
|
135
|
-
parser.add_argument('-p', "--personalized_MOS", action='store_true',
|
|
136
|
-
help='标志表明是需要个性化的MOS分数还是常规的')
|
|
137
|
-
|
|
138
|
-
args = parser.parse_args()
|
|
139
|
-
|
|
140
|
-
main(args)
|
|
@@ -1,137 +0,0 @@
|
|
|
1
|
-
'''
|
|
2
|
-
Author: 凌逆战 | Never
|
|
3
|
-
Date: 2025-08-06 10:00:00
|
|
4
|
-
Description:
|
|
5
|
-
要计算个性化 MOS 分数(干扰说话者受到惩罚),请提供“-p”参数,例如:python dnsmos.py -t ./SampleClips -o sample.csv -p
|
|
6
|
-
要计算常规 MOS 分数,请省略“-p”参数。例如:python dnsmos.py -t ./SampleClips -o sample.csv
|
|
7
|
-
'''
|
|
8
|
-
import argparse
|
|
9
|
-
import concurrent.futures
|
|
10
|
-
import glob
|
|
11
|
-
import os
|
|
12
|
-
import librosa
|
|
13
|
-
import numpy as np
|
|
14
|
-
import onnxruntime as ort
|
|
15
|
-
import pandas as pd
|
|
16
|
-
import soundfile as sf
|
|
17
|
-
from tqdm import tqdm
|
|
18
|
-
from neverlib.utils import get_path_list
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
class ComputeScore:
|
|
22
|
-
def __init__(self, is_personalized_MOS, sampling_rate, input_length) -> None:
|
|
23
|
-
self.sr = sampling_rate
|
|
24
|
-
self.input_length = input_length
|
|
25
|
-
p808_model_path = "./DNSMOS/model_v8.onnx"
|
|
26
|
-
if is_personalized_MOS:
|
|
27
|
-
primary_model_path = "./pDNSMOS/sig_bak_ovr.onnx"
|
|
28
|
-
else:
|
|
29
|
-
primary_model_path = "./DNSMOS/sig_bak_ovr.onnx"
|
|
30
|
-
|
|
31
|
-
self.onnx_sess = ort.InferenceSession(primary_model_path)
|
|
32
|
-
self.p808_onnx_sess = ort.InferenceSession(p808_model_path)
|
|
33
|
-
|
|
34
|
-
def audio_melspec(self, audio, n_mels=120, frame_size=320, hop_length=160, to_db=True):
|
|
35
|
-
mel_spec = librosa.feature.melspectrogram(y=audio, sr=self.sr, n_fft=frame_size + 1, hop_length=hop_length, n_mels=n_mels)
|
|
36
|
-
if to_db:
|
|
37
|
-
mel_spec = (librosa.power_to_db(mel_spec, ref=np.max) + 40) / 40
|
|
38
|
-
print(type(mel_spec))
|
|
39
|
-
return mel_spec.T
|
|
40
|
-
|
|
41
|
-
def get_polyfit_val(self, sig, bak, ovr, is_personalized_MOS):
|
|
42
|
-
if is_personalized_MOS:
|
|
43
|
-
p_ovr = np.poly1d([-0.00533021, 0.005101, 1.18058466, -0.11236046])
|
|
44
|
-
p_sig = np.poly1d([-0.01019296, 0.02751166, 1.19576786, -0.24348726])
|
|
45
|
-
p_bak = np.poly1d([-0.04976499, 0.44276479, -0.1644611, 0.96883132])
|
|
46
|
-
else:
|
|
47
|
-
p_ovr = np.poly1d([-0.06766283, 1.11546468, 0.04602535])
|
|
48
|
-
p_sig = np.poly1d([-0.08397278, 1.22083953, 0.0052439])
|
|
49
|
-
p_bak = np.poly1d([-0.13166888, 1.60915514, -0.39604546])
|
|
50
|
-
|
|
51
|
-
sig_poly, bak_poly, ovr_poly = p_sig(sig), p_bak(bak), p_ovr(ovr)
|
|
52
|
-
|
|
53
|
-
return sig_poly, bak_poly, ovr_poly
|
|
54
|
-
|
|
55
|
-
def __call__(self, wav_path):
|
|
56
|
-
wav, wav_sr = sf.read(wav_path, dtype='float32')
|
|
57
|
-
if wav_sr != self.sr:
|
|
58
|
-
wav = librosa.resample(wav, wav_sr, self.sr)
|
|
59
|
-
else:
|
|
60
|
-
wav = wav
|
|
61
|
-
len_samples = int(self.input_length * self.sr)
|
|
62
|
-
while len(wav) < len_samples:
|
|
63
|
-
wav = np.append(wav, wav)
|
|
64
|
-
|
|
65
|
-
num_hops = int(np.floor(len(wav) / self.sr) - self.input_length) + 1
|
|
66
|
-
hop_len_samples = self.sr
|
|
67
|
-
predicted_mos_sig_seg_raw = []
|
|
68
|
-
predicted_mos_bak_seg_raw = []
|
|
69
|
-
predicted_mos_ovr_seg_raw = []
|
|
70
|
-
predicted_mos_sig_seg = []
|
|
71
|
-
predicted_mos_bak_seg = []
|
|
72
|
-
predicted_mos_ovr_seg = []
|
|
73
|
-
predicted_p808_mos = []
|
|
74
|
-
|
|
75
|
-
for idx in range(num_hops):
|
|
76
|
-
wav_seg = wav[int(idx * hop_len_samples): int((idx + self.input_length) * hop_len_samples)]
|
|
77
|
-
if len(wav_seg) < len_samples:
|
|
78
|
-
continue
|
|
79
|
-
|
|
80
|
-
input_features = np.array(wav_seg)[np.newaxis, :]
|
|
81
|
-
p808_input_features = np.array(self.audio_melspec(audio=wav_seg[:-160])).astype('float32')[np.newaxis, :, :]
|
|
82
|
-
oi = {'input_1': input_features}
|
|
83
|
-
p808_oi = {'input_1': p808_input_features}
|
|
84
|
-
p808_mos = self.p808_onnx_sess.run(None, p808_oi)[0][0][0]
|
|
85
|
-
mos_sig_raw, mos_bak_raw, mos_ovr_raw = self.onnx_sess.run(None, oi)[0][0]
|
|
86
|
-
mos_sig, mos_bak, mos_ovr = self.get_polyfit_val(mos_sig_raw, mos_bak_raw, mos_ovr_raw, self.is_personalized_MOS)
|
|
87
|
-
predicted_mos_sig_seg_raw.append(mos_sig_raw)
|
|
88
|
-
predicted_mos_bak_seg_raw.append(mos_bak_raw)
|
|
89
|
-
predicted_mos_ovr_seg_raw.append(mos_ovr_raw)
|
|
90
|
-
predicted_mos_sig_seg.append(mos_sig)
|
|
91
|
-
predicted_mos_bak_seg.append(mos_bak)
|
|
92
|
-
predicted_mos_ovr_seg.append(mos_ovr)
|
|
93
|
-
predicted_p808_mos.append(p808_mos)
|
|
94
|
-
|
|
95
|
-
OVRL_raw = np.mean(predicted_mos_ovr_seg_raw)
|
|
96
|
-
SIG_raw = np.mean(predicted_mos_sig_seg_raw)
|
|
97
|
-
BAK_raw = np.mean(predicted_mos_bak_seg_raw)
|
|
98
|
-
OVRL = np.mean(predicted_mos_ovr_seg)
|
|
99
|
-
SIG = np.mean(predicted_mos_sig_seg)
|
|
100
|
-
BAK = np.mean(predicted_mos_bak_seg)
|
|
101
|
-
P808_MOS = np.mean(predicted_p808_mos)
|
|
102
|
-
return OVRL_raw, SIG_raw, BAK_raw, OVRL, SIG, BAK, P808_MOS
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
def main(args):
|
|
106
|
-
SAMPLING_RATE = 16000
|
|
107
|
-
INPUT_LENGTH = 9.01
|
|
108
|
-
is_personalized_MOS = args.personalized_MOS
|
|
109
|
-
|
|
110
|
-
compute_score = ComputeScore(is_personalized_MOS, SAMPLING_RATE, INPUT_LENGTH)
|
|
111
|
-
|
|
112
|
-
rows = []
|
|
113
|
-
clips = []
|
|
114
|
-
clips = get_path_list(args.testset_dir, 'wav')
|
|
115
|
-
for clip in tqdm(clips):
|
|
116
|
-
data = compute_score(clip)
|
|
117
|
-
rows.append(data)
|
|
118
|
-
|
|
119
|
-
df = pd.DataFrame(rows)
|
|
120
|
-
if args.csv_path:
|
|
121
|
-
csv_path = args.csv_path
|
|
122
|
-
df.to_csv(csv_path)
|
|
123
|
-
else:
|
|
124
|
-
print(df.describe())
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
if __name__ == "__main__":
|
|
128
|
-
parser = argparse.ArgumentParser()
|
|
129
|
-
parser.add_argument('-t', "--testset_dir", default='.',
|
|
130
|
-
help='包含要评估的.wav格式音频剪辑的目录的路径')
|
|
131
|
-
parser.add_argument('-o', "--csv_path", default=None, help='保存结果的csv文件')
|
|
132
|
-
parser.add_argument('-p', "--personalized_MOS", action='store_true',
|
|
133
|
-
help='标志表明是需要个性化的MOS分数还是常规的')
|
|
134
|
-
|
|
135
|
-
args = parser.parse_args()
|
|
136
|
-
|
|
137
|
-
main(args)
|
|
@@ -1,128 +0,0 @@
|
|
|
1
|
-
'''
|
|
2
|
-
Author: 凌逆战 | Never
|
|
3
|
-
Date: 2025-08-06 10:00:00
|
|
4
|
-
Description:
|
|
5
|
-
要计算个性化 MOS 分数(干扰说话者受到惩罚),请提供“-p”参数,例如:python dnsmos.py -t ./SampleClips -o sample.csv -p
|
|
6
|
-
要计算常规 MOS 分数,请省略“-p”参数。例如:python dnsmos.py -t ./SampleClips -o sample.csv
|
|
7
|
-
'''
|
|
8
|
-
import argparse
|
|
9
|
-
import concurrent.futures
|
|
10
|
-
import glob
|
|
11
|
-
import os
|
|
12
|
-
import librosa
|
|
13
|
-
import numpy as np
|
|
14
|
-
import onnxruntime as ort
|
|
15
|
-
import pandas as pd
|
|
16
|
-
import soundfile as sf
|
|
17
|
-
from tqdm import tqdm
|
|
18
|
-
from neverlib.utils import get_path_list
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
class ComputeScore:
|
|
22
|
-
def __init__(self, is_personalized_MOS, sampling_rate, input_length) -> None:
|
|
23
|
-
self.sr = sampling_rate
|
|
24
|
-
self.input_length = input_length
|
|
25
|
-
p808_model_path = "./DNSMOS/model_v8.onnx"
|
|
26
|
-
if is_personalized_MOS:
|
|
27
|
-
primary_model_path = "./pDNSMOS/sig_bak_ovr.onnx"
|
|
28
|
-
else:
|
|
29
|
-
primary_model_path = "./DNSMOS/sig_bak_ovr.onnx"
|
|
30
|
-
|
|
31
|
-
self.onnx_sess = ort.InferenceSession(primary_model_path)
|
|
32
|
-
self.p808_onnx_sess = ort.InferenceSession(p808_model_path)
|
|
33
|
-
|
|
34
|
-
def audio_melspec(self, audio, n_mels=120, frame_size=320, hop_length=160, to_db=True):
|
|
35
|
-
mel_spec = librosa.feature.melspectrogram(y=audio, sr=self.sr, n_fft=frame_size + 1, hop_length=hop_length, n_mels=n_mels)
|
|
36
|
-
if to_db:
|
|
37
|
-
mel_spec = (librosa.power_to_db(mel_spec, ref=np.max) + 40) / 40
|
|
38
|
-
print(type(mel_spec))
|
|
39
|
-
return mel_spec.T
|
|
40
|
-
|
|
41
|
-
def get_polyfit_val(self, sig, bak, ovr, is_personalized_MOS):
|
|
42
|
-
if is_personalized_MOS:
|
|
43
|
-
p_ovr = np.poly1d([-0.00533021, 0.005101, 1.18058466, -0.11236046])
|
|
44
|
-
p_sig = np.poly1d([-0.01019296, 0.02751166, 1.19576786, -0.24348726])
|
|
45
|
-
p_bak = np.poly1d([-0.04976499, 0.44276479, -0.1644611, 0.96883132])
|
|
46
|
-
else:
|
|
47
|
-
p_ovr = np.poly1d([-0.06766283, 1.11546468, 0.04602535])
|
|
48
|
-
p_sig = np.poly1d([-0.08397278, 1.22083953, 0.0052439])
|
|
49
|
-
p_bak = np.poly1d([-0.13166888, 1.60915514, -0.39604546])
|
|
50
|
-
|
|
51
|
-
sig_poly, bak_poly, ovr_poly = p_sig(sig), p_bak(bak), p_ovr(ovr)
|
|
52
|
-
|
|
53
|
-
return sig_poly, bak_poly, ovr_poly
|
|
54
|
-
|
|
55
|
-
def __call__(self, wav_path):
|
|
56
|
-
wav, wav_sr = sf.read(wav_path, dtype='float32')
|
|
57
|
-
if wav_sr != self.sr:
|
|
58
|
-
wav = librosa.resample(wav, wav_sr, self.sr)
|
|
59
|
-
else:
|
|
60
|
-
wav = wav
|
|
61
|
-
len_samples = int(self.input_length * self.sr)
|
|
62
|
-
while len(wav) < len_samples:
|
|
63
|
-
wav = np.append(wav, wav)
|
|
64
|
-
|
|
65
|
-
num_hops = int(np.floor(len(wav) / self.sr) - self.input_length) + 1
|
|
66
|
-
hop_len_samples = self.sr
|
|
67
|
-
predicted_mos_sig_seg_raw = []
|
|
68
|
-
predicted_mos_bak_seg_raw = []
|
|
69
|
-
predicted_mos_ovr_seg_raw = []
|
|
70
|
-
predicted_mos_sig_seg = []
|
|
71
|
-
predicted_mos_bak_seg = []
|
|
72
|
-
predicted_mos_ovr_seg = []
|
|
73
|
-
predicted_p808_mos = []
|
|
74
|
-
|
|
75
|
-
for idx in range(num_hops):
|
|
76
|
-
wav_seg = wav[int(idx * hop_len_samples): int((idx + self.input_length) * hop_len_samples)]
|
|
77
|
-
if len(wav_seg) < len_samples:
|
|
78
|
-
continue
|
|
79
|
-
|
|
80
|
-
input_features = np.array(wav_seg)[np.newaxis, :]
|
|
81
|
-
p808_input_features = np.array(self.audio_melspec(audio=wav_seg[:-160])).astype('float32')[np.newaxis, :, :]
|
|
82
|
-
oi = {'input_1': input_features}
|
|
83
|
-
p808_oi = {'input_1': p808_input_features}
|
|
84
|
-
p808_mos = self.p808_onnx_sess.run(None, p808_oi)[0][0][0]
|
|
85
|
-
mos_sig_raw, mos_bak_raw, mos_ovr_raw = self.onnx_sess.run(None, oi)[0][0]
|
|
86
|
-
mos_sig, mos_bak, mos_ovr = self.get_polyfit_val(mos_sig_raw, mos_bak_raw, mos_ovr_raw, self.is_personalized_MOS)
|
|
87
|
-
predicted_mos_sig_seg_raw.append(mos_sig_raw)
|
|
88
|
-
predicted_mos_bak_seg_raw.append(mos_bak_raw)
|
|
89
|
-
predicted_mos_ovr_seg_raw.append(mos_ovr_raw)
|
|
90
|
-
predicted_mos_sig_seg.append(mos_sig)
|
|
91
|
-
predicted_mos_bak_seg.append(mos_bak)
|
|
92
|
-
predicted_mos_ovr_seg.append(mos_ovr)
|
|
93
|
-
predicted_p808_mos.append(p808_mos)
|
|
94
|
-
|
|
95
|
-
OVRL_raw = np.mean(predicted_mos_ovr_seg_raw)
|
|
96
|
-
SIG_raw = np.mean(predicted_mos_sig_seg_raw)
|
|
97
|
-
BAK_raw = np.mean(predicted_mos_bak_seg_raw)
|
|
98
|
-
OVRL = np.mean(predicted_mos_ovr_seg)
|
|
99
|
-
SIG = np.mean(predicted_mos_sig_seg)
|
|
100
|
-
BAK = np.mean(predicted_mos_bak_seg)
|
|
101
|
-
P808_MOS = np.mean(predicted_p808_mos)
|
|
102
|
-
return OVRL_raw, SIG_raw, BAK_raw, OVRL, SIG, BAK, P808_MOS
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
def main():
|
|
106
|
-
SAMPLING_RATE = 16000
|
|
107
|
-
INPUT_LENGTH = 9.01
|
|
108
|
-
is_personalized_MOS = False
|
|
109
|
-
|
|
110
|
-
compute_score = ComputeScore(is_personalized_MOS, SAMPLING_RATE, INPUT_LENGTH)
|
|
111
|
-
|
|
112
|
-
rows = []
|
|
113
|
-
clips = []
|
|
114
|
-
clips = get_path_list(args.testset_dir, 'wav')
|
|
115
|
-
for clip in tqdm(clips):
|
|
116
|
-
data = compute_score(clip)
|
|
117
|
-
rows.append(data)
|
|
118
|
-
|
|
119
|
-
df = pd.DataFrame(rows)
|
|
120
|
-
if args.csv_path:
|
|
121
|
-
csv_path = args.csv_path
|
|
122
|
-
df.to_csv(csv_path)
|
|
123
|
-
else:
|
|
124
|
-
print(df.describe())
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
if __name__ == "__main__":
|
|
128
|
-
main()
|
|
@@ -1,129 +0,0 @@
|
|
|
1
|
-
'''
|
|
2
|
-
Author: 凌逆战 | Never
|
|
3
|
-
Date: 2025-08-06 10:00:00
|
|
4
|
-
Description:
|
|
5
|
-
要计算个性化 MOS 分数(干扰说话者受到惩罚),请提供“-p”参数,例如:python dnsmos.py -t ./SampleClips -o sample.csv -p
|
|
6
|
-
要计算常规 MOS 分数,请省略“-p”参数。例如:python dnsmos.py -t ./SampleClips -o sample.csv
|
|
7
|
-
'''
|
|
8
|
-
import argparse
|
|
9
|
-
import concurrent.futures
|
|
10
|
-
import glob
|
|
11
|
-
import os
|
|
12
|
-
import librosa
|
|
13
|
-
import numpy as np
|
|
14
|
-
import onnxruntime as ort
|
|
15
|
-
import pandas as pd
|
|
16
|
-
import soundfile as sf
|
|
17
|
-
from tqdm import tqdm
|
|
18
|
-
from neverlib.utils import get_path_list
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
class ComputeScore:
|
|
22
|
-
def __init__(self, is_personalized_MOS, sampling_rate, input_length) -> None:
|
|
23
|
-
self.sr = sampling_rate
|
|
24
|
-
self.input_length = input_length
|
|
25
|
-
p808_model_path = "./DNSMOS/model_v8.onnx"
|
|
26
|
-
if is_personalized_MOS:
|
|
27
|
-
primary_model_path = "./pDNSMOS/sig_bak_ovr.onnx"
|
|
28
|
-
else:
|
|
29
|
-
primary_model_path = "./DNSMOS/sig_bak_ovr.onnx"
|
|
30
|
-
|
|
31
|
-
self.onnx_sess = ort.InferenceSession(primary_model_path)
|
|
32
|
-
self.p808_onnx_sess = ort.InferenceSession(p808_model_path)
|
|
33
|
-
|
|
34
|
-
def audio_melspec(self, audio, n_mels=120, frame_size=320, hop_length=160, to_db=True):
|
|
35
|
-
mel_spec = librosa.feature.melspectrogram(y=audio, sr=self.sr, n_fft=frame_size + 1, hop_length=hop_length, n_mels=n_mels)
|
|
36
|
-
if to_db:
|
|
37
|
-
mel_spec = (librosa.power_to_db(mel_spec, ref=np.max) + 40) / 40
|
|
38
|
-
print(type(mel_spec))
|
|
39
|
-
return mel_spec.T
|
|
40
|
-
|
|
41
|
-
def get_polyfit_val(self, sig, bak, ovr, is_personalized_MOS):
|
|
42
|
-
if is_personalized_MOS:
|
|
43
|
-
p_ovr = np.poly1d([-0.00533021, 0.005101, 1.18058466, -0.11236046])
|
|
44
|
-
p_sig = np.poly1d([-0.01019296, 0.02751166, 1.19576786, -0.24348726])
|
|
45
|
-
p_bak = np.poly1d([-0.04976499, 0.44276479, -0.1644611, 0.96883132])
|
|
46
|
-
else:
|
|
47
|
-
p_ovr = np.poly1d([-0.06766283, 1.11546468, 0.04602535])
|
|
48
|
-
p_sig = np.poly1d([-0.08397278, 1.22083953, 0.0052439])
|
|
49
|
-
p_bak = np.poly1d([-0.13166888, 1.60915514, -0.39604546])
|
|
50
|
-
|
|
51
|
-
sig_poly, bak_poly, ovr_poly = p_sig(sig), p_bak(bak), p_ovr(ovr)
|
|
52
|
-
|
|
53
|
-
return sig_poly, bak_poly, ovr_poly
|
|
54
|
-
|
|
55
|
-
def __call__(self, wav_path):
|
|
56
|
-
wav, wav_sr = sf.read(wav_path, dtype='float32')
|
|
57
|
-
if wav_sr != self.sr:
|
|
58
|
-
wav = librosa.resample(wav, wav_sr, self.sr)
|
|
59
|
-
else:
|
|
60
|
-
wav = wav
|
|
61
|
-
len_samples = int(self.input_length * self.sr)
|
|
62
|
-
while len(wav) < len_samples:
|
|
63
|
-
wav = np.append(wav, wav)
|
|
64
|
-
|
|
65
|
-
num_hops = int(np.floor(len(wav) / self.sr) - self.input_length) + 1
|
|
66
|
-
hop_len_samples = self.sr
|
|
67
|
-
predicted_mos_sig_seg_raw = []
|
|
68
|
-
predicted_mos_bak_seg_raw = []
|
|
69
|
-
predicted_mos_ovr_seg_raw = []
|
|
70
|
-
predicted_mos_sig_seg = []
|
|
71
|
-
predicted_mos_bak_seg = []
|
|
72
|
-
predicted_mos_ovr_seg = []
|
|
73
|
-
predicted_p808_mos = []
|
|
74
|
-
|
|
75
|
-
for idx in range(num_hops):
|
|
76
|
-
wav_seg = wav[int(idx * hop_len_samples): int((idx + self.input_length) * hop_len_samples)]
|
|
77
|
-
if len(wav_seg) < len_samples:
|
|
78
|
-
continue
|
|
79
|
-
|
|
80
|
-
input_features = np.array(wav_seg)[np.newaxis, :]
|
|
81
|
-
p808_input_features = np.array(self.audio_melspec(audio=wav_seg[:-160])).astype('float32')[np.newaxis, :, :]
|
|
82
|
-
oi = {'input_1': input_features}
|
|
83
|
-
p808_oi = {'input_1': p808_input_features}
|
|
84
|
-
p808_mos = self.p808_onnx_sess.run(None, p808_oi)[0][0][0]
|
|
85
|
-
mos_sig_raw, mos_bak_raw, mos_ovr_raw = self.onnx_sess.run(None, oi)[0][0]
|
|
86
|
-
mos_sig, mos_bak, mos_ovr = self.get_polyfit_val(mos_sig_raw, mos_bak_raw, mos_ovr_raw, self.is_personalized_MOS)
|
|
87
|
-
predicted_mos_sig_seg_raw.append(mos_sig_raw)
|
|
88
|
-
predicted_mos_bak_seg_raw.append(mos_bak_raw)
|
|
89
|
-
predicted_mos_ovr_seg_raw.append(mos_ovr_raw)
|
|
90
|
-
predicted_mos_sig_seg.append(mos_sig)
|
|
91
|
-
predicted_mos_bak_seg.append(mos_bak)
|
|
92
|
-
predicted_mos_ovr_seg.append(mos_ovr)
|
|
93
|
-
predicted_p808_mos.append(p808_mos)
|
|
94
|
-
|
|
95
|
-
OVRL_raw = np.mean(predicted_mos_ovr_seg_raw)
|
|
96
|
-
SIG_raw = np.mean(predicted_mos_sig_seg_raw)
|
|
97
|
-
BAK_raw = np.mean(predicted_mos_bak_seg_raw)
|
|
98
|
-
OVRL = np.mean(predicted_mos_ovr_seg)
|
|
99
|
-
SIG = np.mean(predicted_mos_sig_seg)
|
|
100
|
-
BAK = np.mean(predicted_mos_bak_seg)
|
|
101
|
-
P808_MOS = np.mean(predicted_p808_mos)
|
|
102
|
-
return OVRL_raw, SIG_raw, BAK_raw, OVRL, SIG, BAK, P808_MOS
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
def main():
|
|
106
|
-
SAMPLING_RATE = 16000
|
|
107
|
-
INPUT_LENGTH = 9.01
|
|
108
|
-
is_personalized_MOS = False
|
|
109
|
-
testset_dir = "../data"
|
|
110
|
-
|
|
111
|
-
compute_score = ComputeScore(is_personalized_MOS, SAMPLING_RATE, INPUT_LENGTH)
|
|
112
|
-
|
|
113
|
-
rows = []
|
|
114
|
-
clips = []
|
|
115
|
-
clips = get_path_list(args.testset_dir, 'wav')
|
|
116
|
-
for clip in tqdm(clips):
|
|
117
|
-
data = compute_score(clip)
|
|
118
|
-
rows.append(data)
|
|
119
|
-
|
|
120
|
-
df = pd.DataFrame(rows)
|
|
121
|
-
if args.csv_path:
|
|
122
|
-
csv_path = args.csv_path
|
|
123
|
-
df.to_csv(csv_path)
|
|
124
|
-
else:
|
|
125
|
-
print(df.describe())
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
if __name__ == "__main__":
|
|
129
|
-
main()
|
|
@@ -1,117 +0,0 @@
|
|
|
1
|
-
'''
|
|
2
|
-
Author: 凌逆战 | Never
|
|
3
|
-
Date: 2025-08-06 10:00:00
|
|
4
|
-
Description:
|
|
5
|
-
要计算个性化 MOS 分数(干扰说话者受到惩罚),请提供“-p”参数,例如:python dnsmos.py -t ./SampleClips -o sample.csv -p
|
|
6
|
-
要计算常规 MOS 分数,请省略“-p”参数。例如:python dnsmos.py -t ./SampleClips -o sample.csv
|
|
7
|
-
'''
|
|
8
|
-
import argparse
|
|
9
|
-
import concurrent.futures
|
|
10
|
-
import glob
|
|
11
|
-
import os
|
|
12
|
-
import librosa
|
|
13
|
-
import numpy as np
|
|
14
|
-
import onnxruntime as ort
|
|
15
|
-
import pandas as pd
|
|
16
|
-
import soundfile as sf
|
|
17
|
-
from tqdm import tqdm
|
|
18
|
-
from neverlib.utils import get_path_list
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
class ComputeScore:
|
|
22
|
-
def __init__(self, is_personalized_MOS, sampling_rate, input_length) -> None:
|
|
23
|
-
self.sr = sampling_rate
|
|
24
|
-
self.input_length = input_length
|
|
25
|
-
p808_model_path = "./DNSMOS/model_v8.onnx"
|
|
26
|
-
if is_personalized_MOS:
|
|
27
|
-
primary_model_path = "./pDNSMOS/sig_bak_ovr.onnx"
|
|
28
|
-
else:
|
|
29
|
-
primary_model_path = "./DNSMOS/sig_bak_ovr.onnx"
|
|
30
|
-
|
|
31
|
-
self.onnx_sess = ort.InferenceSession(primary_model_path)
|
|
32
|
-
self.p808_onnx_sess = ort.InferenceSession(p808_model_path)
|
|
33
|
-
|
|
34
|
-
def audio_melspec(self, audio, n_mels=120, frame_size=320, hop_length=160, to_db=True):
|
|
35
|
-
mel_spec = librosa.feature.melspectrogram(y=audio, sr=self.sr, n_fft=frame_size + 1, hop_length=hop_length, n_mels=n_mels)
|
|
36
|
-
if to_db:
|
|
37
|
-
mel_spec = (librosa.power_to_db(mel_spec, ref=np.max) + 40) / 40
|
|
38
|
-
print(type(mel_spec))
|
|
39
|
-
return mel_spec.T
|
|
40
|
-
|
|
41
|
-
def get_polyfit_val(self, sig, bak, ovr, is_personalized_MOS):
|
|
42
|
-
if is_personalized_MOS:
|
|
43
|
-
p_ovr = np.poly1d([-0.00533021, 0.005101, 1.18058466, -0.11236046])
|
|
44
|
-
p_sig = np.poly1d([-0.01019296, 0.02751166, 1.19576786, -0.24348726])
|
|
45
|
-
p_bak = np.poly1d([-0.04976499, 0.44276479, -0.1644611, 0.96883132])
|
|
46
|
-
else:
|
|
47
|
-
p_ovr = np.poly1d([-0.06766283, 1.11546468, 0.04602535])
|
|
48
|
-
p_sig = np.poly1d([-0.08397278, 1.22083953, 0.0052439])
|
|
49
|
-
p_bak = np.poly1d([-0.13166888, 1.60915514, -0.39604546])
|
|
50
|
-
|
|
51
|
-
sig_poly, bak_poly, ovr_poly = p_sig(sig), p_bak(bak), p_ovr(ovr)
|
|
52
|
-
|
|
53
|
-
return sig_poly, bak_poly, ovr_poly
|
|
54
|
-
|
|
55
|
-
def __call__(self, wav_path):
|
|
56
|
-
wav, wav_sr = sf.read(wav_path, dtype='float32')
|
|
57
|
-
if wav_sr != self.sr:
|
|
58
|
-
wav = librosa.resample(wav, wav_sr, self.sr)
|
|
59
|
-
else:
|
|
60
|
-
wav = wav
|
|
61
|
-
len_samples = int(self.input_length * self.sr)
|
|
62
|
-
while len(wav) < len_samples:
|
|
63
|
-
wav = np.append(wav, wav)
|
|
64
|
-
|
|
65
|
-
num_hops = int(np.floor(len(wav) / self.sr) - self.input_length) + 1
|
|
66
|
-
hop_len_samples = self.sr
|
|
67
|
-
predicted_mos_sig_seg_raw = []
|
|
68
|
-
predicted_mos_bak_seg_raw = []
|
|
69
|
-
predicted_mos_ovr_seg_raw = []
|
|
70
|
-
predicted_mos_sig_seg = []
|
|
71
|
-
predicted_mos_bak_seg = []
|
|
72
|
-
predicted_mos_ovr_seg = []
|
|
73
|
-
predicted_p808_mos = []
|
|
74
|
-
|
|
75
|
-
for idx in range(num_hops):
|
|
76
|
-
wav_seg = wav[int(idx * hop_len_samples): int((idx + self.input_length) * hop_len_samples)]
|
|
77
|
-
if len(wav_seg) < len_samples:
|
|
78
|
-
continue
|
|
79
|
-
|
|
80
|
-
input_features = np.array(wav_seg)[np.newaxis, :]
|
|
81
|
-
p808_input_features = np.array(self.audio_melspec(audio=wav_seg[:-160])).astype('float32')[np.newaxis, :, :]
|
|
82
|
-
oi = {'input_1': input_features}
|
|
83
|
-
p808_oi = {'input_1': p808_input_features}
|
|
84
|
-
p808_mos = self.p808_onnx_sess.run(None, p808_oi)[0][0][0]
|
|
85
|
-
mos_sig_raw, mos_bak_raw, mos_ovr_raw = self.onnx_sess.run(None, oi)[0][0]
|
|
86
|
-
mos_sig, mos_bak, mos_ovr = self.get_polyfit_val(mos_sig_raw, mos_bak_raw, mos_ovr_raw, self.is_personalized_MOS)
|
|
87
|
-
predicted_mos_sig_seg_raw.append(mos_sig_raw)
|
|
88
|
-
predicted_mos_bak_seg_raw.append(mos_bak_raw)
|
|
89
|
-
predicted_mos_ovr_seg_raw.append(mos_ovr_raw)
|
|
90
|
-
predicted_mos_sig_seg.append(mos_sig)
|
|
91
|
-
predicted_mos_bak_seg.append(mos_bak)
|
|
92
|
-
predicted_mos_ovr_seg.append(mos_ovr)
|
|
93
|
-
predicted_p808_mos.append(p808_mos)
|
|
94
|
-
|
|
95
|
-
OVRL_raw = np.mean(predicted_mos_ovr_seg_raw)
|
|
96
|
-
SIG_raw = np.mean(predicted_mos_sig_seg_raw)
|
|
97
|
-
BAK_raw = np.mean(predicted_mos_bak_seg_raw)
|
|
98
|
-
OVRL = np.mean(predicted_mos_ovr_seg)
|
|
99
|
-
SIG = np.mean(predicted_mos_sig_seg)
|
|
100
|
-
BAK = np.mean(predicted_mos_bak_seg)
|
|
101
|
-
P808_MOS = np.mean(predicted_p808_mos)
|
|
102
|
-
return OVRL_raw, SIG_raw, BAK_raw, OVRL, SIG, BAK, P808_MOS
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
def main():
|
|
106
|
-
SAMPLING_RATE = 16000
|
|
107
|
-
INPUT_LENGTH = 9.01
|
|
108
|
-
is_personalized_MOS = False
|
|
109
|
-
testset_dir = "../data/vad_example.wav"
|
|
110
|
-
|
|
111
|
-
compute_score = ComputeScore(is_personalized_MOS, SAMPLING_RATE, INPUT_LENGTH)
|
|
112
|
-
data = compute_score(testset_dir)
|
|
113
|
-
print(data)
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
if __name__ == "__main__":
|
|
117
|
-
main()
|