neverlib 0.2.5__py3-none-any.whl → 0.2.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- neverlib/.claude/settings.local.json +9 -0
- neverlib/Docs/audio_aug/test_volume.ipynb +416 -0
- neverlib/Docs/audio_aug_test/test_volume.ipynb +289 -0
- neverlib/Docs/filter/biquad.ipynb +129 -0
- neverlib/Docs/filter/filter_family.ipynb +450 -0
- neverlib/Docs/filter/highpass.ipynb +139 -0
- neverlib/Docs/filter/scipy_filter_family.ipynb +110 -0
- neverlib/Docs/vad/VAD_Energy.ipynb +167 -0
- neverlib/Docs/vad/VAD_Silero.ipynb +325 -0
- neverlib/Docs/vad/VAD_WebRTC.ipynb +189 -0
- neverlib/Docs/vad/VAD_funasr.ipynb +192 -0
- neverlib/Docs/vad/VAD_rvADfast.ipynb +162 -0
- neverlib/Docs/vad/VAD_statistics.ipynb +532 -0
- neverlib/Docs/vad/VAD_tenVAD.ipynb +292 -0
- neverlib/Docs/vad/VAD_vadlib.ipynb +168 -0
- neverlib/Docs/vad/VAD_whisper.ipynb +404 -0
- neverlib/QA/gen_init.py +117 -0
- neverlib/QA/get_fun.py +19 -0
- neverlib/__init__.py +21 -4
- neverlib/audio_aug/HarmonicDistortion.py +19 -13
- neverlib/audio_aug/__init__.py +30 -12
- neverlib/audio_aug/audio_aug.py +19 -14
- neverlib/audio_aug/clip_aug.py +15 -18
- neverlib/audio_aug/coder_aug.py +44 -24
- neverlib/audio_aug/coder_aug2.py +54 -37
- neverlib/audio_aug/loss_packet_aug.py +7 -7
- neverlib/audio_aug/quant_aug.py +19 -17
- neverlib/data/000_short_enhance.wav +0 -0
- neverlib/data/3956_speech.wav +0 -0
- neverlib/data/3956_sweep.wav +0 -0
- neverlib/data/vad_example.wav +0 -0
- neverlib/data/white.wav +0 -0
- neverlib/data/white_EQ.wav +0 -0
- neverlib/data/white_matched.wav +0 -0
- neverlib/data_analyze/__init__.py +25 -20
- neverlib/data_analyze/dataset_analyzer.py +109 -114
- neverlib/data_analyze/quality_metrics.py +87 -89
- neverlib/data_analyze/rms_distrubution.py +23 -42
- neverlib/data_analyze/spectral_analysis.py +43 -46
- neverlib/data_analyze/statistics.py +76 -76
- neverlib/data_analyze/temporal_features.py +15 -6
- neverlib/data_analyze/visualization.py +208 -144
- neverlib/filter/__init__.py +17 -20
- neverlib/filter/auto_eq/__init__.py +18 -35
- neverlib/filter/auto_eq/de_eq.py +0 -2
- neverlib/filter/common.py +24 -5
- neverlib/metrics/DNSMOS/bak_ovr.onnx +0 -0
- neverlib/metrics/DNSMOS/model_v8.onnx +0 -0
- neverlib/metrics/DNSMOS/sig.onnx +0 -0
- neverlib/metrics/DNSMOS/sig_bak_ovr.onnx +0 -0
- neverlib/metrics/__init__.py +23 -0
- neverlib/metrics/dnsmos.py +4 -15
- neverlib/metrics/pDNSMOS/sig_bak_ovr.onnx +0 -0
- neverlib/metrics/pesq_c/PESQ +0 -0
- neverlib/metrics/pesq_c/dsp.c +553 -0
- neverlib/metrics/pesq_c/dsp.h +138 -0
- neverlib/metrics/pesq_c/pesq.h +294 -0
- neverlib/metrics/pesq_c/pesqdsp.c +1047 -0
- neverlib/metrics/pesq_c/pesqio.c +392 -0
- neverlib/metrics/pesq_c/pesqmain.c +610 -0
- neverlib/metrics/pesq_c/pesqmod.c +1417 -0
- neverlib/metrics/pesq_c/pesqpar.h +297 -0
- neverlib/metrics/snr.py +5 -1
- neverlib/metrics/spec.py +31 -21
- neverlib/metrics/test_pesq.py +0 -4
- neverlib/tests/test_imports.py +17 -0
- neverlib/utils/__init__.py +26 -15
- neverlib/utils/audio_split.py +5 -1
- neverlib/utils/checkGPU.py +17 -9
- neverlib/utils/lazy_expose.py +29 -0
- neverlib/utils/utils.py +40 -12
- neverlib/vad/__init__.py +33 -25
- neverlib/vad/class_get_speech.py +1 -1
- neverlib/vad/class_vad.py +3 -3
- neverlib/vad/img.png +0 -0
- {neverlib-0.2.5.dist-info → neverlib-0.2.7.dist-info}/METADATA +20 -17
- {neverlib-0.2.5.dist-info → neverlib-0.2.7.dist-info}/RECORD +80 -37
- {neverlib-0.2.5.dist-info → neverlib-0.2.7.dist-info}/WHEEL +0 -0
- {neverlib-0.2.5.dist-info → neverlib-0.2.7.dist-info}/licenses/LICENSE +0 -0
- {neverlib-0.2.5.dist-info → neverlib-0.2.7.dist-info}/top_level.txt +0 -0
neverlib/QA/gen_init.py
ADDED
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
# 自动生成指定包目录下的 __init__.py(懒加载格式:lazy_loader.attach)
|
|
2
|
+
|
|
3
|
+
import ast
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Dict, List, Set
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def _extract_exports_from_module(py_file: Path) -> List[str]:
|
|
9
|
+
"""
|
|
10
|
+
优先读取模块内显式定义的 __all__,否则收集顶层的类与函数名(排除以下划线开头的)。
|
|
11
|
+
"""
|
|
12
|
+
try:
|
|
13
|
+
source = py_file.read_text(encoding='utf-8')
|
|
14
|
+
except UnicodeDecodeError:
|
|
15
|
+
source = py_file.read_text(errors='ignore')
|
|
16
|
+
|
|
17
|
+
try:
|
|
18
|
+
tree = ast.parse(source, filename=str(py_file))
|
|
19
|
+
except SyntaxError:
|
|
20
|
+
return []
|
|
21
|
+
|
|
22
|
+
# 1) 如果定义了 __all__ 则优先使用
|
|
23
|
+
for node in tree.body:
|
|
24
|
+
if isinstance(node, ast.Assign):
|
|
25
|
+
for target in node.targets:
|
|
26
|
+
if isinstance(target, ast.Name) and target.id == '__all__':
|
|
27
|
+
values: List[str] = []
|
|
28
|
+
if isinstance(node.value, (ast.List, ast.Tuple)):
|
|
29
|
+
for elt in node.value.elts:
|
|
30
|
+
if isinstance(elt, ast.Str): # py<3.8
|
|
31
|
+
values.append(elt.s)
|
|
32
|
+
elif isinstance(elt, ast.Constant) and isinstance(elt.value, str):
|
|
33
|
+
values.append(elt.value)
|
|
34
|
+
return [name for name in values if name and not name.startswith('_')]
|
|
35
|
+
|
|
36
|
+
# 2) 否则收集顶层函数与类名
|
|
37
|
+
exports: Set[str] = set()
|
|
38
|
+
for node in tree.body:
|
|
39
|
+
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef)):
|
|
40
|
+
name = node.name
|
|
41
|
+
if not name.startswith('_'):
|
|
42
|
+
exports.add(name)
|
|
43
|
+
return sorted(exports)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def generate_init_for_directory(package_dir: Path) -> Path:
|
|
47
|
+
"""
|
|
48
|
+
为指定目录生成懒加载版 __init__.py(覆盖写入)。
|
|
49
|
+
- 仅扫描一级子模块(同级 .py 文件),忽略以下划线开头的模块与 __init__.py 本身。
|
|
50
|
+
- 生成结构:
|
|
51
|
+
from lazy_loader import attach
|
|
52
|
+
|
|
53
|
+
__getattr__, __dir__, __all__ = attach(
|
|
54
|
+
__name__,
|
|
55
|
+
submodules=["m1", "m2", ...],
|
|
56
|
+
submod_attrs={"m1": ["A", "B"], ...}
|
|
57
|
+
)
|
|
58
|
+
"""
|
|
59
|
+
package_dir = package_dir.resolve()
|
|
60
|
+
if not package_dir.is_dir():
|
|
61
|
+
raise ValueError(f"目标不是目录: {package_dir}")
|
|
62
|
+
|
|
63
|
+
module_to_exports: Dict[str, List[str]] = {}
|
|
64
|
+
module_names: List[str] = []
|
|
65
|
+
for py_file in sorted(package_dir.glob('*.py')):
|
|
66
|
+
if py_file.name == '__init__.py':
|
|
67
|
+
continue
|
|
68
|
+
if py_file.name.startswith('_'):
|
|
69
|
+
continue
|
|
70
|
+
module_name = py_file.stem
|
|
71
|
+
module_names.append(module_name)
|
|
72
|
+
exports = _extract_exports_from_module(py_file)
|
|
73
|
+
if exports:
|
|
74
|
+
module_to_exports[module_name] = exports
|
|
75
|
+
|
|
76
|
+
lines: List[str] = []
|
|
77
|
+
lines.append('# This file is auto-generated. Do NOT edit manually.')
|
|
78
|
+
lines.append('# Generated by neverlib.QA.gen_init')
|
|
79
|
+
lines.append('from lazy_loader import attach')
|
|
80
|
+
lines.append('')
|
|
81
|
+
lines.append('__getattr__, __dir__, __all__ = attach(')
|
|
82
|
+
lines.append(' __name__,')
|
|
83
|
+
lines.append(' submodules=[')
|
|
84
|
+
for name in sorted(module_names):
|
|
85
|
+
lines.append(f' "{name}",')
|
|
86
|
+
lines.append(' ],')
|
|
87
|
+
lines.append(' submod_attrs={')
|
|
88
|
+
for module_name in sorted(module_to_exports.keys()):
|
|
89
|
+
exports_sorted = sorted(module_to_exports[module_name])
|
|
90
|
+
joined = ', '.join([f"'{e}'" for e in exports_sorted])
|
|
91
|
+
lines.append(f' "{module_name}": [{joined}],')
|
|
92
|
+
lines.append(' }')
|
|
93
|
+
lines.append(')')
|
|
94
|
+
lines.append('')
|
|
95
|
+
|
|
96
|
+
init_file = package_dir / '__init__.py'
|
|
97
|
+
init_file.write_text('\n'.join(lines), encoding='utf-8')
|
|
98
|
+
return init_file
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def main():
|
|
102
|
+
import argparse
|
|
103
|
+
|
|
104
|
+
parser = argparse.ArgumentParser(description='为指定包目录自动生成 __init__.py')
|
|
105
|
+
parser.add_argument('-p', '--package-dir', dest='package_dir',
|
|
106
|
+
default='../../neverlib/vad',
|
|
107
|
+
help='包目录路径,比如 /path/to/pkg 或 ./neverlib/utils')
|
|
108
|
+
args = parser.parse_args()
|
|
109
|
+
|
|
110
|
+
target_dir = Path(args.package_dir)
|
|
111
|
+
init_path = generate_init_for_directory(target_dir)
|
|
112
|
+
print(f'已生成: {init_path}')
|
|
113
|
+
# python -m neverlib.QA.gen_init /data03/never/Desktop/neverlib/neverlib/utils
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
if __name__ == '__main__':
|
|
117
|
+
main()
|
neverlib/QA/get_fun.py
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
# 获取一个python脚本里面所有的函数名
|
|
2
|
+
|
|
3
|
+
import ast
|
|
4
|
+
from typing import List
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def get_function_names(file_path: str) -> List[str]:
|
|
8
|
+
with open(file_path, 'r', encoding='utf-8') as file:
|
|
9
|
+
source = file.read()
|
|
10
|
+
tree = ast.parse(source, filename=file_path)
|
|
11
|
+
names: List[str] = []
|
|
12
|
+
for node in ast.walk(tree):
|
|
13
|
+
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
|
14
|
+
names.append(node.name)
|
|
15
|
+
return names
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
if __name__ == '__main__':
|
|
19
|
+
print(get_function_names('../utils/checkGPU.py'))
|
neverlib/__init__.py
CHANGED
|
@@ -1,15 +1,31 @@
|
|
|
1
1
|
'''
|
|
2
2
|
Author: 凌逆战 | Never
|
|
3
|
-
Date: 2025-
|
|
3
|
+
Date: 2025-09-07
|
|
4
4
|
Description: neverlib - 音频处理和VAD工具集
|
|
5
|
+
|
|
6
|
+
这是一个提供音频处理、增强、分析和语音活动检测(VAD)功能的Python库。
|
|
7
|
+
该库使用懒加载机制,可以根据需要导入模块,提高启动速度并减少内存占用。
|
|
8
|
+
|
|
9
|
+
主要功能模块:
|
|
10
|
+
- utils: 实用工具函数
|
|
11
|
+
- vad: 语音活动检测
|
|
12
|
+
- audio_aug: 音频增强和数据增广
|
|
13
|
+
- filter: 滤波和音频处理
|
|
14
|
+
- data_analyze: 数据分析工具
|
|
15
|
+
- metrics: 音频质量评估指标
|
|
16
|
+
|
|
17
|
+
注意: 所有功能需要通过具体子模块导入,例如:
|
|
18
|
+
from neverlib.audio_aug import limiter
|
|
19
|
+
from neverlib.vad import EnergyVad_C
|
|
20
|
+
from neverlib.filter import HPFilter
|
|
5
21
|
'''
|
|
6
22
|
try:
|
|
7
23
|
import re
|
|
8
24
|
import pathlib
|
|
9
|
-
|
|
25
|
+
|
|
10
26
|
# 获取pyproject.toml的路径
|
|
11
27
|
_pyproject_path = pathlib.Path(__file__).parent.parent / "pyproject.toml"
|
|
12
|
-
|
|
28
|
+
|
|
13
29
|
# 读取版本号
|
|
14
30
|
if _pyproject_path.exists():
|
|
15
31
|
with open(_pyproject_path, "r", encoding="utf-8") as f:
|
|
@@ -27,5 +43,6 @@ from lazy_loader import attach
|
|
|
27
43
|
|
|
28
44
|
__getattr__, __dir__, __all__ = attach(
|
|
29
45
|
__name__,
|
|
30
|
-
submodules=["
|
|
46
|
+
submodules=["audio_aug", "data_analyze", "filter", "metrics", "utils", "vad", ],
|
|
47
|
+
# 只导出子模块,不直接导出函数
|
|
31
48
|
)
|
|
@@ -3,10 +3,8 @@ Author: 凌逆战 | Never
|
|
|
3
3
|
Date: 2025-07-29 17:49:25
|
|
4
4
|
Description:
|
|
5
5
|
'''
|
|
6
|
-
import pedalboard as pdb
|
|
7
|
-
import librosa
|
|
8
|
-
import soundfile as sf
|
|
9
6
|
import numpy as np
|
|
7
|
+
import soundfile as sf
|
|
10
8
|
|
|
11
9
|
|
|
12
10
|
def apply_harmonic_distortion(wav, drive=1.0, mix=1.0):
|
|
@@ -54,11 +52,15 @@ def apply_pedalboard_distortion(wav, sr, drive_db=15.0):
|
|
|
54
52
|
sr (int): 采样率。
|
|
55
53
|
drive_db (float): 驱动增益, 单位是分贝(dB)。值越大失真越严重。
|
|
56
54
|
"""
|
|
55
|
+
try:
|
|
56
|
+
import pedalboard as pdb
|
|
57
|
+
except ImportError:
|
|
58
|
+
raise ImportError(
|
|
59
|
+
"pedalboard is required for apply_pedalboard_distortion(). "
|
|
60
|
+
"Please install it via `pip install pedalboard`.")
|
|
57
61
|
# 1. 创建一个效果器处理板 (Pedalboard)
|
|
58
62
|
# 这里只放一个 Distortion 效果器
|
|
59
|
-
board = pdb.Pedalboard([
|
|
60
|
-
pdb.Distortion(drive_db=drive_db)
|
|
61
|
-
])
|
|
63
|
+
board = pdb.Pedalboard([pdb.Distortion(drive_db=drive_db)])
|
|
62
64
|
|
|
63
65
|
# 2. 处理音频
|
|
64
66
|
# pedalboard 要求输入是 (channels, samples) 或 (samples,)
|
|
@@ -68,12 +70,16 @@ def apply_pedalboard_distortion(wav, sr, drive_db=15.0):
|
|
|
68
70
|
return distorted_wav
|
|
69
71
|
|
|
70
72
|
|
|
71
|
-
|
|
72
|
-
|
|
73
|
+
if __name__ == "__main__":
|
|
74
|
+
|
|
75
|
+
# --- 使用示例 ---
|
|
76
|
+
y, sr = sf.read('your_audio.wav', sr=None)
|
|
73
77
|
|
|
74
|
-
# 模拟一个中等程度的过载失真
|
|
75
|
-
drive_db_amount = 25.0
|
|
76
|
-
y_pb_distorted = apply_pedalboard_distortion(y,
|
|
78
|
+
# 模拟一个中等程度的过载失真
|
|
79
|
+
drive_db_amount = 25.0
|
|
80
|
+
y_pb_distorted = apply_pedalboard_distortion(y,
|
|
81
|
+
sr,
|
|
82
|
+
drive_db=drive_db_amount)
|
|
77
83
|
|
|
78
|
-
sf.write('augmented_pedalboard_distortion.wav', y_pb_distorted, sr)
|
|
79
|
-
print("使用 Pedalboard 的谐波失真增强完成!")
|
|
84
|
+
sf.write('augmented_pedalboard_distortion.wav', y_pb_distorted, sr)
|
|
85
|
+
print("使用 Pedalboard 的谐波失真增强完成!")
|
neverlib/audio_aug/__init__.py
CHANGED
|
@@ -1,12 +1,30 @@
|
|
|
1
|
-
#
|
|
2
|
-
#
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
1
|
+
# This file is auto-generated. Do NOT edit manually.
|
|
2
|
+
# Generated by neverlib.QA.gen_init
|
|
3
|
+
from lazy_loader import attach
|
|
4
|
+
|
|
5
|
+
__getattr__, __dir__, __all__ = attach(
|
|
6
|
+
__name__,
|
|
7
|
+
submodules=[
|
|
8
|
+
"HarmonicDistortion",
|
|
9
|
+
"TFMask",
|
|
10
|
+
"audio_aug",
|
|
11
|
+
"clip_aug",
|
|
12
|
+
"coder_aug",
|
|
13
|
+
"coder_aug2",
|
|
14
|
+
"loss_packet_aug",
|
|
15
|
+
"quant_aug",
|
|
16
|
+
],
|
|
17
|
+
submod_attrs={
|
|
18
|
+
"HarmonicDistortion": ['apply_harmonic_distortion', 'apply_pedalboard_distortion'],
|
|
19
|
+
"TFMask": ['FreqMask', 'TimeMask'],
|
|
20
|
+
"audio_aug": ['add_reverb', 'get_snr_use_vad', 'limiter', 'measure_loudness', 'snr_aug_Interpolation',
|
|
21
|
+
'snr_aug_changeClean', 'snr_aug_changeNoise', 'snr_aug_vad_Interpolation',
|
|
22
|
+
'snr_diff_changeClean', 'snr_diff_changeNoise', 'volume_aug', 'volume_aug_dbrms',
|
|
23
|
+
'volume_aug_linmax', 'volume_aug_lufs', 'volume_convert', 'volume_norm'],
|
|
24
|
+
"clip_aug": ['clipping_aug'],
|
|
25
|
+
"coder_aug": ['aac_aug_save', 'amr_nb_aug', 'amr_wb_aug', 'flac_aug', 'flac_encode_save', 'mp3_aug', 'opus_aug_save', 'vorbis_aug'],
|
|
26
|
+
"coder_aug2": ['apply_codec_distortion', 'check_codec_available'],
|
|
27
|
+
"loss_packet_aug": ['simulate_packet_loss_vectorized'],
|
|
28
|
+
"quant_aug": ['apply_mulaw_quantization', 'apply_uniform_quantization'],
|
|
29
|
+
}
|
|
30
|
+
)
|
neverlib/audio_aug/audio_aug.py
CHANGED
|
@@ -6,10 +6,9 @@
|
|
|
6
6
|
"""
|
|
7
7
|
import random
|
|
8
8
|
import numpy as np
|
|
9
|
-
import soundfile as sf
|
|
10
9
|
from scipy import signal
|
|
11
|
-
from
|
|
12
|
-
from
|
|
10
|
+
from ..utils import EPS
|
|
11
|
+
from ..filter import HPFilter
|
|
13
12
|
|
|
14
13
|
|
|
15
14
|
def limiter(wav, threshold=0.999):
|
|
@@ -299,18 +298,18 @@ def volume_aug_linmax(wav, target_level, hpf=False, sr=16000, order=4, cutoff=10
|
|
|
299
298
|
return wav_opt
|
|
300
299
|
|
|
301
300
|
|
|
302
|
-
|
|
301
|
+
# 注意: 避免在模块导入阶段引入可选依赖 pyloudnorm
|
|
303
302
|
|
|
304
303
|
def volume_aug_lufs(wav, target_lufs, hpf=False, sr=16000, order=4, cutoff=100):
|
|
305
304
|
"""
|
|
306
305
|
音量增强, 使用lufs方法,
|
|
307
306
|
LUFS是“感知响度” → 跟人耳听感对齐,而且符合国际响度标准。
|
|
308
|
-
|
|
307
|
+
|
|
309
308
|
LUFS 使用 感知加权(K-weighting)
|
|
310
309
|
- 高频增强(模拟人耳在 3~6kHz 的敏感)
|
|
311
310
|
- 低频衰减(降低 <100Hz 对响度的影响)。
|
|
312
311
|
使用 短时块(400ms)能量 + 响度门限(-70 LUFS) 过滤极静音段。
|
|
313
|
-
|
|
312
|
+
|
|
314
313
|
Args:
|
|
315
314
|
wav: 音频
|
|
316
315
|
target_lufs: 目标音量, 单位lufs
|
|
@@ -318,7 +317,7 @@ def volume_aug_lufs(wav, target_lufs, hpf=False, sr=16000, order=4, cutoff=100):
|
|
|
318
317
|
sr: 采样率
|
|
319
318
|
order: 滤波器阶数
|
|
320
319
|
cutoff: 截止频率
|
|
321
|
-
|
|
320
|
+
|
|
322
321
|
补充信息:
|
|
323
322
|
## 推荐的 target_lufs 值(行业参考)
|
|
324
323
|
平台 推荐目标 LUFS
|
|
@@ -328,6 +327,11 @@ def volume_aug_lufs(wav, target_lufs, hpf=False, sr=16000, order=4, cutoff=100):
|
|
|
328
327
|
游戏音频 -16 ~ -18
|
|
329
328
|
有声书 -18 ~ -20
|
|
330
329
|
"""
|
|
330
|
+
try:
|
|
331
|
+
import pyloudnorm as pyln
|
|
332
|
+
except Exception as e:
|
|
333
|
+
raise ImportError("需要安装 pyloudnorm 才能使用 volume_aug_lufs: pip install pyloudnorm") from e
|
|
334
|
+
|
|
331
335
|
wav_tmp = wav.copy()
|
|
332
336
|
if hpf:
|
|
333
337
|
wav_tmp = HPFilter(wav_tmp, sr=sr, order=4, cutoff=1000)
|
|
@@ -336,16 +340,17 @@ def volume_aug_lufs(wav, target_lufs, hpf=False, sr=16000, order=4, cutoff=100):
|
|
|
336
340
|
meter = pyln.Meter(sr, block_size=0.400) # block_size=400ms
|
|
337
341
|
|
|
338
342
|
# Step3: 测量当前 LUFS
|
|
339
|
-
loudness = meter.integrated_loudness(wav_tmp)
|
|
343
|
+
loudness = meter.integrated_loudness(wav_tmp)
|
|
340
344
|
|
|
341
345
|
# Step4: 计算增益并应用
|
|
342
346
|
loudness_diff = target_lufs - loudness
|
|
343
347
|
scalar = 10 ** (loudness_diff / 20.0)
|
|
344
348
|
wav_opt = wav * scalar
|
|
345
349
|
|
|
346
|
-
wav_opt = limiter(wav_opt, threshold=0.999)
|
|
350
|
+
wav_opt = limiter(wav_opt, threshold=0.999) # Step5: 限幅
|
|
347
351
|
return wav_opt
|
|
348
352
|
|
|
353
|
+
|
|
349
354
|
def measure_loudness(wav, sr):
|
|
350
355
|
"""
|
|
351
356
|
测量音频的 Peak / RMS / LUFS,以及峰均比(Crest Factor)
|
|
@@ -384,10 +389,10 @@ def measure_loudness(wav, sr):
|
|
|
384
389
|
"lufs": loudness_lufs,
|
|
385
390
|
"crest_factor_db": crest_factor_db
|
|
386
391
|
}
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
def volume_convert(value,
|
|
390
|
-
from_unit="linear", to_unit="dBFS",
|
|
392
|
+
|
|
393
|
+
|
|
394
|
+
def volume_convert(value,
|
|
395
|
+
from_unit="linear", to_unit="dBFS",
|
|
391
396
|
crest_factor_db=None, lufs_offset=None):
|
|
392
397
|
"""
|
|
393
398
|
音量单位转换函数
|
|
@@ -442,4 +447,4 @@ def volume_convert(value,
|
|
|
442
447
|
rms_dbfs = 20 * np.log10(lin_val + EPS)
|
|
443
448
|
return rms_dbfs - lufs_offset
|
|
444
449
|
else:
|
|
445
|
-
raise ValueError(f"未知单位:{to_unit}")
|
|
450
|
+
raise ValueError(f"未知单位:{to_unit}")
|
neverlib/audio_aug/clip_aug.py
CHANGED
|
@@ -3,13 +3,9 @@ Author: 凌逆战 | Never
|
|
|
3
3
|
Date: 2025-07-29 17:06:28
|
|
4
4
|
Description:
|
|
5
5
|
'''
|
|
6
|
-
import
|
|
7
|
-
sys.path.append("..")
|
|
8
|
-
import librosa
|
|
6
|
+
import random
|
|
9
7
|
import numpy as np
|
|
10
8
|
import soundfile as sf
|
|
11
|
-
import random
|
|
12
|
-
from audiomentations import Compose, ClippingDistortion
|
|
13
9
|
|
|
14
10
|
|
|
15
11
|
def clipping_aug(wav):
|
|
@@ -17,25 +13,26 @@ def clipping_aug(wav):
|
|
|
17
13
|
模拟录音设备或音频处理设备的动态范围限制
|
|
18
14
|
"""
|
|
19
15
|
wav = wav / np.max(np.abs(wav)) # 归一化
|
|
20
|
-
gain = random.uniform(1.0, 2)
|
|
16
|
+
gain = random.uniform(1.0, 2) # 增益
|
|
21
17
|
wav = wav * gain
|
|
22
18
|
wav = np.clip(wav, -1.0, 1.0)
|
|
23
19
|
|
|
24
20
|
return wav
|
|
25
21
|
|
|
26
22
|
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
wav =
|
|
30
|
-
|
|
23
|
+
if __name__ == "__main__":
|
|
24
|
+
wav_path = "/data/never/Desktop/kws_train/QA/wav_data/TIMIT.wav"
|
|
25
|
+
wav, wav_sr = sf.read(wav_path, always_2d=True)
|
|
26
|
+
wav = wav.T
|
|
27
|
+
print(wav.shape)
|
|
31
28
|
|
|
32
|
-
# 应用削波增强
|
|
33
|
-
# 我们让削波阈值在音频振幅的50%到75%之间随机选择
|
|
34
|
-
# 这意味着信号中最响亮的25%到50%的部分将被削平
|
|
35
|
-
y_clipped = clipping_aug(wav, wav_sr, min_percentile=50, max_percentile=75)
|
|
29
|
+
# 应用削波增强
|
|
30
|
+
# 我们让削波阈值在音频振幅的50%到75%之间随机选择
|
|
31
|
+
# 这意味着信号中最响亮的25%到50%的部分将被削平
|
|
32
|
+
y_clipped = clipping_aug(wav, wav_sr, min_percentile=50, max_percentile=75)
|
|
36
33
|
|
|
37
|
-
# 保存增强后的音频
|
|
38
|
-
output_path = './augmented_clipped.wav'
|
|
39
|
-
sf.write(output_path, y_clipped.T, wav_sr)
|
|
34
|
+
# 保存增强后的音频
|
|
35
|
+
output_path = './augmented_clipped.wav'
|
|
36
|
+
sf.write(output_path, y_clipped.T, wav_sr)
|
|
40
37
|
|
|
41
|
-
print(f"削波增强完成!增强后的音频已保存至: {output_path}")
|
|
38
|
+
print(f"削波增强完成!增强后的音频已保存至: {output_path}")
|
neverlib/audio_aug/coder_aug.py
CHANGED
|
@@ -20,16 +20,20 @@ AMR (Adaptive Multi-Rate)
|
|
|
20
20
|
- 压缩特性:严格为语音设计, 会滤除大部分非语音频率(如音乐), 导致音乐听起来“电话音”效果。
|
|
21
21
|
- 数据增强目的:固定采样率:AMR-NB (窄带) 为 8kHz, AMR-WB (宽带) 为 16kHz。这一点至关重要!
|
|
22
22
|
"""
|
|
23
|
+
import random
|
|
23
24
|
import numpy as np
|
|
24
25
|
import soundfile as sf
|
|
25
|
-
from audiomentations import Mp3Compression
|
|
26
|
-
import av
|
|
27
|
-
import random
|
|
28
|
-
|
|
29
|
-
# mp3编解码数据增强
|
|
30
26
|
|
|
31
27
|
|
|
32
28
|
def mp3_aug(wav, sr):
|
|
29
|
+
# mp3编解码数据增强
|
|
30
|
+
try:
|
|
31
|
+
from audiomentations import Mp3Compression
|
|
32
|
+
except ImportError:
|
|
33
|
+
raise ImportError(
|
|
34
|
+
"audiomentations is required for mp3_aug(). "
|
|
35
|
+
"Please install it via `pip install audiomentations`.")
|
|
36
|
+
|
|
33
37
|
# return Mp3Compression(min_bitrate=64, max_bitrate=192, p=1.0)(samples, sample_rate)
|
|
34
38
|
return sf.write('audio.mp3', wav, sr, format='MP3', bitrate='192k')
|
|
35
39
|
|
|
@@ -47,11 +51,20 @@ def opus_aug_save(wav: np.ndarray, sr: int, output_filepath: str):
|
|
|
47
51
|
对音频进行 Opus 压缩, 并直接保存到文件。
|
|
48
52
|
使用 PyAV 实现, 比特率是随机的。
|
|
49
53
|
"""
|
|
54
|
+
try:
|
|
55
|
+
import av
|
|
56
|
+
except ImportError:
|
|
57
|
+
raise ImportError("av is required for opus_aug_save(). "
|
|
58
|
+
"Please install it via `pip install av`.")
|
|
59
|
+
|
|
50
60
|
# 随机选择一个比特率 (kbps)
|
|
51
61
|
bitrate_kbps = random.choice([24, 32, 48, 64, 96, 128])
|
|
52
|
-
output_filepath_with_bitrate = output_filepath.replace(
|
|
62
|
+
output_filepath_with_bitrate = output_filepath.replace(
|
|
63
|
+
'.opus', f'_{bitrate_kbps}k.opus')
|
|
53
64
|
|
|
54
|
-
print(
|
|
65
|
+
print(
|
|
66
|
+
f" -> Saving Opus augmented version to: {output_filepath_with_bitrate} (Bitrate: {bitrate_kbps}k)"
|
|
67
|
+
)
|
|
55
68
|
|
|
56
69
|
# PyAV 需要 (n_channels, n_samples) 格式
|
|
57
70
|
wav_ch_first = wav.T if wav.ndim > 1 else wav.reshape(1, -1)
|
|
@@ -81,19 +94,28 @@ def aac_aug_save(wav: np.ndarray, sr: int, output_filepath: str):
|
|
|
81
94
|
对音频进行 AAC 压缩, 并直接保存到文件。
|
|
82
95
|
使用 PyAV 实现, 比特率是随机的。
|
|
83
96
|
"""
|
|
97
|
+
try:
|
|
98
|
+
import av
|
|
99
|
+
except ImportError:
|
|
100
|
+
raise ImportError("av is required for aac_aug_save(). "
|
|
101
|
+
"Please install it via `pip install av`.")
|
|
84
102
|
# 随机选择一个比特率 (kbps)
|
|
85
103
|
bitrate_kbps = random.choice([48, 64, 96, 128, 160, 192])
|
|
86
104
|
# .m4a 是 AAC 更常用的文件后缀
|
|
87
|
-
output_filepath_with_bitrate = output_filepath.replace(
|
|
105
|
+
output_filepath_with_bitrate = output_filepath.replace(
|
|
106
|
+
'.aac', f'_{bitrate_kbps}k.m4a')
|
|
88
107
|
|
|
89
|
-
print(
|
|
108
|
+
print(
|
|
109
|
+
f" -> Saving AAC augmented version to: {output_filepath_with_bitrate} (Bitrate: {bitrate_kbps}k)"
|
|
110
|
+
)
|
|
90
111
|
|
|
91
112
|
# PyAV 需要 (n_channels, n_samples) 格式
|
|
92
113
|
wav_ch_first = wav.T if wav.ndim > 1 else wav.reshape(1, -1)
|
|
93
114
|
layout = 'stereo' if wav.ndim > 1 else 'mono'
|
|
94
115
|
|
|
95
116
|
# 注意:format='adts' 是原始 AAC 流, 'mp4' 会创建 .m4a/.mp4 容器
|
|
96
|
-
with av.open(output_filepath_with_bitrate, mode='w',
|
|
117
|
+
with av.open(output_filepath_with_bitrate, mode='w',
|
|
118
|
+
format='mp4') as container:
|
|
97
119
|
# 使用高质量的 fdk_aac 编码器
|
|
98
120
|
stream = container.add_stream('libfdk_aac', rate=sr, layout=layout)
|
|
99
121
|
stream.bit_rate = bitrate_kbps * 1000
|
|
@@ -111,13 +133,11 @@ def aac_aug_save(wav: np.ndarray, sr: int, output_filepath: str):
|
|
|
111
133
|
print(f" ... success.")
|
|
112
134
|
|
|
113
135
|
|
|
114
|
-
def flac_encode_save(
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
bits_per_sample=None
|
|
120
|
-
):
|
|
136
|
+
def flac_encode_save(wav: np.ndarray,
|
|
137
|
+
sr: int,
|
|
138
|
+
output_filepath: str,
|
|
139
|
+
compression_level: int = 5,
|
|
140
|
+
bits_per_sample=None):
|
|
121
141
|
"""
|
|
122
142
|
使用 pyFLAC 将 NumPy 音频数组编码为 FLAC 文件并保存。
|
|
123
143
|
|
|
@@ -164,14 +184,14 @@ def flac_encode_save(
|
|
|
164
184
|
else:
|
|
165
185
|
wav_int = wav
|
|
166
186
|
else:
|
|
167
|
-
raise ValueError(
|
|
187
|
+
raise ValueError(
|
|
188
|
+
f"Unsupported bits_per_sample: {bits_per_sample}. Must be 16, 24, or 32."
|
|
189
|
+
)
|
|
168
190
|
|
|
169
191
|
# --- 2. 初始化编码器 ---
|
|
170
|
-
encoder = Encoder(
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
compression_level=compression_level
|
|
174
|
-
)
|
|
192
|
+
encoder = Encoder(sample_rate=sr,
|
|
193
|
+
bits_per_sample=bits_per_sample,
|
|
194
|
+
compression_level=compression_level)
|
|
175
195
|
|
|
176
196
|
# --- 3. 处理数据并获取编码后的字节 ---
|
|
177
197
|
# Encoder.process() 可以分块处理, 但对于中等长度的音频, 一次性处理更简单
|
|
@@ -196,8 +216,8 @@ def amr_wb_aug(wav, sr):
|
|
|
196
216
|
# return ApplyCodec(encoder="libamr_wb", p=1.0)(samples, sample_rate)
|
|
197
217
|
return sf.write('audio.amr', wav, sr, format='AMR', bitrate='192k')
|
|
198
218
|
|
|
199
|
-
# Opus 编解码数据增强
|
|
200
219
|
|
|
220
|
+
# Opus 编解码数据增强
|
|
201
221
|
|
|
202
222
|
if __name__ == "__main__":
|
|
203
223
|
wav_path = "/data/never/Desktop/kws_train/QA/wav_data/TIMIT.wav"
|