neverlib 0.2.5__py3-none-any.whl → 0.2.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. neverlib/.claude/settings.local.json +9 -0
  2. neverlib/Docs/audio_aug/test_volume.ipynb +416 -0
  3. neverlib/Docs/audio_aug_test/test_volume.ipynb +289 -0
  4. neverlib/Docs/filter/biquad.ipynb +129 -0
  5. neverlib/Docs/filter/filter_family.ipynb +450 -0
  6. neverlib/Docs/filter/highpass.ipynb +139 -0
  7. neverlib/Docs/filter/scipy_filter_family.ipynb +110 -0
  8. neverlib/Docs/vad/VAD_Energy.ipynb +167 -0
  9. neverlib/Docs/vad/VAD_Silero.ipynb +325 -0
  10. neverlib/Docs/vad/VAD_WebRTC.ipynb +189 -0
  11. neverlib/Docs/vad/VAD_funasr.ipynb +192 -0
  12. neverlib/Docs/vad/VAD_rvADfast.ipynb +162 -0
  13. neverlib/Docs/vad/VAD_statistics.ipynb +532 -0
  14. neverlib/Docs/vad/VAD_tenVAD.ipynb +292 -0
  15. neverlib/Docs/vad/VAD_vadlib.ipynb +168 -0
  16. neverlib/Docs/vad/VAD_whisper.ipynb +404 -0
  17. neverlib/QA/gen_init.py +117 -0
  18. neverlib/QA/get_fun.py +19 -0
  19. neverlib/__init__.py +21 -4
  20. neverlib/audio_aug/HarmonicDistortion.py +19 -13
  21. neverlib/audio_aug/__init__.py +30 -12
  22. neverlib/audio_aug/audio_aug.py +19 -14
  23. neverlib/audio_aug/clip_aug.py +15 -18
  24. neverlib/audio_aug/coder_aug.py +44 -24
  25. neverlib/audio_aug/coder_aug2.py +54 -37
  26. neverlib/audio_aug/loss_packet_aug.py +7 -7
  27. neverlib/audio_aug/quant_aug.py +19 -17
  28. neverlib/data/000_short_enhance.wav +0 -0
  29. neverlib/data/3956_speech.wav +0 -0
  30. neverlib/data/3956_sweep.wav +0 -0
  31. neverlib/data/vad_example.wav +0 -0
  32. neverlib/data/white.wav +0 -0
  33. neverlib/data/white_EQ.wav +0 -0
  34. neverlib/data/white_matched.wav +0 -0
  35. neverlib/data_analyze/__init__.py +25 -20
  36. neverlib/data_analyze/dataset_analyzer.py +109 -114
  37. neverlib/data_analyze/quality_metrics.py +87 -89
  38. neverlib/data_analyze/rms_distrubution.py +23 -42
  39. neverlib/data_analyze/spectral_analysis.py +43 -46
  40. neverlib/data_analyze/statistics.py +76 -76
  41. neverlib/data_analyze/temporal_features.py +15 -6
  42. neverlib/data_analyze/visualization.py +208 -144
  43. neverlib/filter/__init__.py +17 -20
  44. neverlib/filter/auto_eq/__init__.py +18 -35
  45. neverlib/filter/auto_eq/de_eq.py +0 -2
  46. neverlib/filter/common.py +24 -5
  47. neverlib/metrics/DNSMOS/bak_ovr.onnx +0 -0
  48. neverlib/metrics/DNSMOS/model_v8.onnx +0 -0
  49. neverlib/metrics/DNSMOS/sig.onnx +0 -0
  50. neverlib/metrics/DNSMOS/sig_bak_ovr.onnx +0 -0
  51. neverlib/metrics/__init__.py +23 -0
  52. neverlib/metrics/dnsmos.py +4 -15
  53. neverlib/metrics/pDNSMOS/sig_bak_ovr.onnx +0 -0
  54. neverlib/metrics/pesq_c/PESQ +0 -0
  55. neverlib/metrics/pesq_c/dsp.c +553 -0
  56. neverlib/metrics/pesq_c/dsp.h +138 -0
  57. neverlib/metrics/pesq_c/pesq.h +294 -0
  58. neverlib/metrics/pesq_c/pesqdsp.c +1047 -0
  59. neverlib/metrics/pesq_c/pesqio.c +392 -0
  60. neverlib/metrics/pesq_c/pesqmain.c +610 -0
  61. neverlib/metrics/pesq_c/pesqmod.c +1417 -0
  62. neverlib/metrics/pesq_c/pesqpar.h +297 -0
  63. neverlib/metrics/snr.py +5 -1
  64. neverlib/metrics/spec.py +31 -21
  65. neverlib/metrics/test_pesq.py +0 -4
  66. neverlib/tests/test_imports.py +17 -0
  67. neverlib/utils/__init__.py +26 -15
  68. neverlib/utils/audio_split.py +5 -1
  69. neverlib/utils/checkGPU.py +17 -9
  70. neverlib/utils/lazy_expose.py +29 -0
  71. neverlib/utils/utils.py +40 -12
  72. neverlib/vad/__init__.py +33 -25
  73. neverlib/vad/class_get_speech.py +1 -1
  74. neverlib/vad/class_vad.py +3 -3
  75. neverlib/vad/img.png +0 -0
  76. {neverlib-0.2.5.dist-info → neverlib-0.2.7.dist-info}/METADATA +20 -17
  77. {neverlib-0.2.5.dist-info → neverlib-0.2.7.dist-info}/RECORD +80 -37
  78. {neverlib-0.2.5.dist-info → neverlib-0.2.7.dist-info}/WHEEL +0 -0
  79. {neverlib-0.2.5.dist-info → neverlib-0.2.7.dist-info}/licenses/LICENSE +0 -0
  80. {neverlib-0.2.5.dist-info → neverlib-0.2.7.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,117 @@
1
+ # 自动生成指定包目录下的 __init__.py(懒加载格式:lazy_loader.attach)
2
+
3
+ import ast
4
+ from pathlib import Path
5
+ from typing import Dict, List, Set
6
+
7
+
8
+ def _extract_exports_from_module(py_file: Path) -> List[str]:
9
+ """
10
+ 优先读取模块内显式定义的 __all__,否则收集顶层的类与函数名(排除以下划线开头的)。
11
+ """
12
+ try:
13
+ source = py_file.read_text(encoding='utf-8')
14
+ except UnicodeDecodeError:
15
+ source = py_file.read_text(errors='ignore')
16
+
17
+ try:
18
+ tree = ast.parse(source, filename=str(py_file))
19
+ except SyntaxError:
20
+ return []
21
+
22
+ # 1) 如果定义了 __all__ 则优先使用
23
+ for node in tree.body:
24
+ if isinstance(node, ast.Assign):
25
+ for target in node.targets:
26
+ if isinstance(target, ast.Name) and target.id == '__all__':
27
+ values: List[str] = []
28
+ if isinstance(node.value, (ast.List, ast.Tuple)):
29
+ for elt in node.value.elts:
30
+ if isinstance(elt, ast.Str): # py<3.8
31
+ values.append(elt.s)
32
+ elif isinstance(elt, ast.Constant) and isinstance(elt.value, str):
33
+ values.append(elt.value)
34
+ return [name for name in values if name and not name.startswith('_')]
35
+
36
+ # 2) 否则收集顶层函数与类名
37
+ exports: Set[str] = set()
38
+ for node in tree.body:
39
+ if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef)):
40
+ name = node.name
41
+ if not name.startswith('_'):
42
+ exports.add(name)
43
+ return sorted(exports)
44
+
45
+
46
+ def generate_init_for_directory(package_dir: Path) -> Path:
47
+ """
48
+ 为指定目录生成懒加载版 __init__.py(覆盖写入)。
49
+ - 仅扫描一级子模块(同级 .py 文件),忽略以下划线开头的模块与 __init__.py 本身。
50
+ - 生成结构:
51
+ from lazy_loader import attach
52
+
53
+ __getattr__, __dir__, __all__ = attach(
54
+ __name__,
55
+ submodules=["m1", "m2", ...],
56
+ submod_attrs={"m1": ["A", "B"], ...}
57
+ )
58
+ """
59
+ package_dir = package_dir.resolve()
60
+ if not package_dir.is_dir():
61
+ raise ValueError(f"目标不是目录: {package_dir}")
62
+
63
+ module_to_exports: Dict[str, List[str]] = {}
64
+ module_names: List[str] = []
65
+ for py_file in sorted(package_dir.glob('*.py')):
66
+ if py_file.name == '__init__.py':
67
+ continue
68
+ if py_file.name.startswith('_'):
69
+ continue
70
+ module_name = py_file.stem
71
+ module_names.append(module_name)
72
+ exports = _extract_exports_from_module(py_file)
73
+ if exports:
74
+ module_to_exports[module_name] = exports
75
+
76
+ lines: List[str] = []
77
+ lines.append('# This file is auto-generated. Do NOT edit manually.')
78
+ lines.append('# Generated by neverlib.QA.gen_init')
79
+ lines.append('from lazy_loader import attach')
80
+ lines.append('')
81
+ lines.append('__getattr__, __dir__, __all__ = attach(')
82
+ lines.append(' __name__,')
83
+ lines.append(' submodules=[')
84
+ for name in sorted(module_names):
85
+ lines.append(f' "{name}",')
86
+ lines.append(' ],')
87
+ lines.append(' submod_attrs={')
88
+ for module_name in sorted(module_to_exports.keys()):
89
+ exports_sorted = sorted(module_to_exports[module_name])
90
+ joined = ', '.join([f"'{e}'" for e in exports_sorted])
91
+ lines.append(f' "{module_name}": [{joined}],')
92
+ lines.append(' }')
93
+ lines.append(')')
94
+ lines.append('')
95
+
96
+ init_file = package_dir / '__init__.py'
97
+ init_file.write_text('\n'.join(lines), encoding='utf-8')
98
+ return init_file
99
+
100
+
101
+ def main():
102
+ import argparse
103
+
104
+ parser = argparse.ArgumentParser(description='为指定包目录自动生成 __init__.py')
105
+ parser.add_argument('-p', '--package-dir', dest='package_dir',
106
+ default='../../neverlib/vad',
107
+ help='包目录路径,比如 /path/to/pkg 或 ./neverlib/utils')
108
+ args = parser.parse_args()
109
+
110
+ target_dir = Path(args.package_dir)
111
+ init_path = generate_init_for_directory(target_dir)
112
+ print(f'已生成: {init_path}')
113
+ # python -m neverlib.QA.gen_init /data03/never/Desktop/neverlib/neverlib/utils
114
+
115
+
116
+ if __name__ == '__main__':
117
+ main()
neverlib/QA/get_fun.py ADDED
@@ -0,0 +1,19 @@
1
+ # 获取一个python脚本里面所有的函数名
2
+
3
+ import ast
4
+ from typing import List
5
+
6
+
7
+ def get_function_names(file_path: str) -> List[str]:
8
+ with open(file_path, 'r', encoding='utf-8') as file:
9
+ source = file.read()
10
+ tree = ast.parse(source, filename=file_path)
11
+ names: List[str] = []
12
+ for node in ast.walk(tree):
13
+ if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
14
+ names.append(node.name)
15
+ return names
16
+
17
+
18
+ if __name__ == '__main__':
19
+ print(get_function_names('../utils/checkGPU.py'))
neverlib/__init__.py CHANGED
@@ -1,15 +1,31 @@
1
1
  '''
2
2
  Author: 凌逆战 | Never
3
- Date: 2025-08-22
3
+ Date: 2025-09-07
4
4
  Description: neverlib - 音频处理和VAD工具集
5
+
6
+ 这是一个提供音频处理、增强、分析和语音活动检测(VAD)功能的Python库。
7
+ 该库使用懒加载机制,可以根据需要导入模块,提高启动速度并减少内存占用。
8
+
9
+ 主要功能模块:
10
+ - utils: 实用工具函数
11
+ - vad: 语音活动检测
12
+ - audio_aug: 音频增强和数据增广
13
+ - filter: 滤波和音频处理
14
+ - data_analyze: 数据分析工具
15
+ - metrics: 音频质量评估指标
16
+
17
+ 注意: 所有功能需要通过具体子模块导入,例如:
18
+ from neverlib.audio_aug import limiter
19
+ from neverlib.vad import EnergyVad_C
20
+ from neverlib.filter import HPFilter
5
21
  '''
6
22
  try:
7
23
  import re
8
24
  import pathlib
9
-
25
+
10
26
  # 获取pyproject.toml的路径
11
27
  _pyproject_path = pathlib.Path(__file__).parent.parent / "pyproject.toml"
12
-
28
+
13
29
  # 读取版本号
14
30
  if _pyproject_path.exists():
15
31
  with open(_pyproject_path, "r", encoding="utf-8") as f:
@@ -27,5 +43,6 @@ from lazy_loader import attach
27
43
 
28
44
  __getattr__, __dir__, __all__ = attach(
29
45
  __name__,
30
- submodules=["utils", "vad", "audio_aug", "filter", "data_analyze"],
46
+ submodules=["audio_aug", "data_analyze", "filter", "metrics", "utils", "vad", ],
47
+ # 只导出子模块,不直接导出函数
31
48
  )
@@ -3,10 +3,8 @@ Author: 凌逆战 | Never
3
3
  Date: 2025-07-29 17:49:25
4
4
  Description:
5
5
  '''
6
- import pedalboard as pdb
7
- import librosa
8
- import soundfile as sf
9
6
  import numpy as np
7
+ import soundfile as sf
10
8
 
11
9
 
12
10
  def apply_harmonic_distortion(wav, drive=1.0, mix=1.0):
@@ -54,11 +52,15 @@ def apply_pedalboard_distortion(wav, sr, drive_db=15.0):
54
52
  sr (int): 采样率。
55
53
  drive_db (float): 驱动增益, 单位是分贝(dB)。值越大失真越严重。
56
54
  """
55
+ try:
56
+ import pedalboard as pdb
57
+ except ImportError:
58
+ raise ImportError(
59
+ "pedalboard is required for apply_pedalboard_distortion(). "
60
+ "Please install it via `pip install pedalboard`.")
57
61
  # 1. 创建一个效果器处理板 (Pedalboard)
58
62
  # 这里只放一个 Distortion 效果器
59
- board = pdb.Pedalboard([
60
- pdb.Distortion(drive_db=drive_db)
61
- ])
63
+ board = pdb.Pedalboard([pdb.Distortion(drive_db=drive_db)])
62
64
 
63
65
  # 2. 处理音频
64
66
  # pedalboard 要求输入是 (channels, samples) 或 (samples,)
@@ -68,12 +70,16 @@ def apply_pedalboard_distortion(wav, sr, drive_db=15.0):
68
70
  return distorted_wav
69
71
 
70
72
 
71
- # --- 使用示例 ---
72
- y, sr = librosa.load('your_audio.wav', sr=None)
73
+ if __name__ == "__main__":
74
+
75
+ # --- 使用示例 ---
76
+ y, sr = sf.read('your_audio.wav', sr=None)
73
77
 
74
- # 模拟一个中等程度的过载失真
75
- drive_db_amount = 25.0
76
- y_pb_distorted = apply_pedalboard_distortion(y, sr, drive_db=drive_db_amount)
78
+ # 模拟一个中等程度的过载失真
79
+ drive_db_amount = 25.0
80
+ y_pb_distorted = apply_pedalboard_distortion(y,
81
+ sr,
82
+ drive_db=drive_db_amount)
77
83
 
78
- sf.write('augmented_pedalboard_distortion.wav', y_pb_distorted, sr)
79
- print("使用 Pedalboard 的谐波失真增强完成!")
84
+ sf.write('augmented_pedalboard_distortion.wav', y_pb_distorted, sr)
85
+ print("使用 Pedalboard 的谐波失真增强完成!")
@@ -1,12 +1,30 @@
1
- # -*- coding:utf-8 -*-
2
- # Author:凌逆战 | Never
3
- # Date: 2024/5/17
4
- """
5
- 音频增强模块
6
- """
7
- from lazy_loader import attach
8
-
9
- __getattr__, __dir__, __all__ = attach(
10
- __name__,
11
- submodules=["audio_aug"],
12
- )
1
+ # This file is auto-generated. Do NOT edit manually.
2
+ # Generated by neverlib.QA.gen_init
3
+ from lazy_loader import attach
4
+
5
+ __getattr__, __dir__, __all__ = attach(
6
+ __name__,
7
+ submodules=[
8
+ "HarmonicDistortion",
9
+ "TFMask",
10
+ "audio_aug",
11
+ "clip_aug",
12
+ "coder_aug",
13
+ "coder_aug2",
14
+ "loss_packet_aug",
15
+ "quant_aug",
16
+ ],
17
+ submod_attrs={
18
+ "HarmonicDistortion": ['apply_harmonic_distortion', 'apply_pedalboard_distortion'],
19
+ "TFMask": ['FreqMask', 'TimeMask'],
20
+ "audio_aug": ['add_reverb', 'get_snr_use_vad', 'limiter', 'measure_loudness', 'snr_aug_Interpolation',
21
+ 'snr_aug_changeClean', 'snr_aug_changeNoise', 'snr_aug_vad_Interpolation',
22
+ 'snr_diff_changeClean', 'snr_diff_changeNoise', 'volume_aug', 'volume_aug_dbrms',
23
+ 'volume_aug_linmax', 'volume_aug_lufs', 'volume_convert', 'volume_norm'],
24
+ "clip_aug": ['clipping_aug'],
25
+ "coder_aug": ['aac_aug_save', 'amr_nb_aug', 'amr_wb_aug', 'flac_aug', 'flac_encode_save', 'mp3_aug', 'opus_aug_save', 'vorbis_aug'],
26
+ "coder_aug2": ['apply_codec_distortion', 'check_codec_available'],
27
+ "loss_packet_aug": ['simulate_packet_loss_vectorized'],
28
+ "quant_aug": ['apply_mulaw_quantization', 'apply_uniform_quantization'],
29
+ }
30
+ )
@@ -6,10 +6,9 @@
6
6
  """
7
7
  import random
8
8
  import numpy as np
9
- import soundfile as sf
10
9
  from scipy import signal
11
- from neverlib.utils import EPS
12
- from neverlib.filter import HPFilter
10
+ from ..utils import EPS
11
+ from ..filter import HPFilter
13
12
 
14
13
 
15
14
  def limiter(wav, threshold=0.999):
@@ -299,18 +298,18 @@ def volume_aug_linmax(wav, target_level, hpf=False, sr=16000, order=4, cutoff=10
299
298
  return wav_opt
300
299
 
301
300
 
302
- import pyloudnorm as pyln
301
+ # 注意: 避免在模块导入阶段引入可选依赖 pyloudnorm
303
302
 
304
303
  def volume_aug_lufs(wav, target_lufs, hpf=False, sr=16000, order=4, cutoff=100):
305
304
  """
306
305
  音量增强, 使用lufs方法,
307
306
  LUFS是“感知响度” → 跟人耳听感对齐,而且符合国际响度标准。
308
-
307
+
309
308
  LUFS 使用 感知加权(K-weighting)
310
309
  - 高频增强(模拟人耳在 3~6kHz 的敏感)
311
310
  - 低频衰减(降低 <100Hz 对响度的影响)。
312
311
  使用 短时块(400ms)能量 + 响度门限(-70 LUFS) 过滤极静音段。
313
-
312
+
314
313
  Args:
315
314
  wav: 音频
316
315
  target_lufs: 目标音量, 单位lufs
@@ -318,7 +317,7 @@ def volume_aug_lufs(wav, target_lufs, hpf=False, sr=16000, order=4, cutoff=100):
318
317
  sr: 采样率
319
318
  order: 滤波器阶数
320
319
  cutoff: 截止频率
321
-
320
+
322
321
  补充信息:
323
322
  ## 推荐的 target_lufs 值(行业参考)
324
323
  平台 推荐目标 LUFS
@@ -328,6 +327,11 @@ def volume_aug_lufs(wav, target_lufs, hpf=False, sr=16000, order=4, cutoff=100):
328
327
  游戏音频 -16 ~ -18
329
328
  有声书 -18 ~ -20
330
329
  """
330
+ try:
331
+ import pyloudnorm as pyln
332
+ except Exception as e:
333
+ raise ImportError("需要安装 pyloudnorm 才能使用 volume_aug_lufs: pip install pyloudnorm") from e
334
+
331
335
  wav_tmp = wav.copy()
332
336
  if hpf:
333
337
  wav_tmp = HPFilter(wav_tmp, sr=sr, order=4, cutoff=1000)
@@ -336,16 +340,17 @@ def volume_aug_lufs(wav, target_lufs, hpf=False, sr=16000, order=4, cutoff=100):
336
340
  meter = pyln.Meter(sr, block_size=0.400) # block_size=400ms
337
341
 
338
342
  # Step3: 测量当前 LUFS
339
- loudness = meter.integrated_loudness(wav_tmp)
343
+ loudness = meter.integrated_loudness(wav_tmp)
340
344
 
341
345
  # Step4: 计算增益并应用
342
346
  loudness_diff = target_lufs - loudness
343
347
  scalar = 10 ** (loudness_diff / 20.0)
344
348
  wav_opt = wav * scalar
345
349
 
346
- wav_opt = limiter(wav_opt, threshold=0.999) # Step5: 限幅
350
+ wav_opt = limiter(wav_opt, threshold=0.999) # Step5: 限幅
347
351
  return wav_opt
348
352
 
353
+
349
354
  def measure_loudness(wav, sr):
350
355
  """
351
356
  测量音频的 Peak / RMS / LUFS,以及峰均比(Crest Factor)
@@ -384,10 +389,10 @@ def measure_loudness(wav, sr):
384
389
  "lufs": loudness_lufs,
385
390
  "crest_factor_db": crest_factor_db
386
391
  }
387
-
388
-
389
- def volume_convert(value,
390
- from_unit="linear", to_unit="dBFS",
392
+
393
+
394
+ def volume_convert(value,
395
+ from_unit="linear", to_unit="dBFS",
391
396
  crest_factor_db=None, lufs_offset=None):
392
397
  """
393
398
  音量单位转换函数
@@ -442,4 +447,4 @@ def volume_convert(value,
442
447
  rms_dbfs = 20 * np.log10(lin_val + EPS)
443
448
  return rms_dbfs - lufs_offset
444
449
  else:
445
- raise ValueError(f"未知单位:{to_unit}")
450
+ raise ValueError(f"未知单位:{to_unit}")
@@ -3,13 +3,9 @@ Author: 凌逆战 | Never
3
3
  Date: 2025-07-29 17:06:28
4
4
  Description:
5
5
  '''
6
- import sys
7
- sys.path.append("..")
8
- import librosa
6
+ import random
9
7
  import numpy as np
10
8
  import soundfile as sf
11
- import random
12
- from audiomentations import Compose, ClippingDistortion
13
9
 
14
10
 
15
11
  def clipping_aug(wav):
@@ -17,25 +13,26 @@ def clipping_aug(wav):
17
13
  模拟录音设备或音频处理设备的动态范围限制
18
14
  """
19
15
  wav = wav / np.max(np.abs(wav)) # 归一化
20
- gain = random.uniform(1.0, 2) # 增益
16
+ gain = random.uniform(1.0, 2) # 增益
21
17
  wav = wav * gain
22
18
  wav = np.clip(wav, -1.0, 1.0)
23
19
 
24
20
  return wav
25
21
 
26
22
 
27
- wav_path = "/data/never/Desktop/kws_train/QA/wav_data/TIMIT.wav"
28
- wav, wav_sr = sf.read(wav_path, always_2d=True)
29
- wav = wav.T
30
- print(wav.shape)
23
+ if __name__ == "__main__":
24
+ wav_path = "/data/never/Desktop/kws_train/QA/wav_data/TIMIT.wav"
25
+ wav, wav_sr = sf.read(wav_path, always_2d=True)
26
+ wav = wav.T
27
+ print(wav.shape)
31
28
 
32
- # 应用削波增强
33
- # 我们让削波阈值在音频振幅的50%到75%之间随机选择
34
- # 这意味着信号中最响亮的25%到50%的部分将被削平
35
- y_clipped = clipping_aug(wav, wav_sr, min_percentile=50, max_percentile=75)
29
+ # 应用削波增强
30
+ # 我们让削波阈值在音频振幅的50%到75%之间随机选择
31
+ # 这意味着信号中最响亮的25%到50%的部分将被削平
32
+ y_clipped = clipping_aug(wav, wav_sr, min_percentile=50, max_percentile=75)
36
33
 
37
- # 保存增强后的音频
38
- output_path = './augmented_clipped.wav'
39
- sf.write(output_path, y_clipped.T, wav_sr)
34
+ # 保存增强后的音频
35
+ output_path = './augmented_clipped.wav'
36
+ sf.write(output_path, y_clipped.T, wav_sr)
40
37
 
41
- print(f"削波增强完成!增强后的音频已保存至: {output_path}")
38
+ print(f"削波增强完成!增强后的音频已保存至: {output_path}")
@@ -20,16 +20,20 @@ AMR (Adaptive Multi-Rate)
20
20
  - 压缩特性:严格为语音设计, 会滤除大部分非语音频率(如音乐), 导致音乐听起来“电话音”效果。
21
21
  - 数据增强目的:固定采样率:AMR-NB (窄带) 为 8kHz, AMR-WB (宽带) 为 16kHz。这一点至关重要!
22
22
  """
23
+ import random
23
24
  import numpy as np
24
25
  import soundfile as sf
25
- from audiomentations import Mp3Compression
26
- import av
27
- import random
28
-
29
- # mp3编解码数据增强
30
26
 
31
27
 
32
28
  def mp3_aug(wav, sr):
29
+ # mp3编解码数据增强
30
+ try:
31
+ from audiomentations import Mp3Compression
32
+ except ImportError:
33
+ raise ImportError(
34
+ "audiomentations is required for mp3_aug(). "
35
+ "Please install it via `pip install audiomentations`.")
36
+
33
37
  # return Mp3Compression(min_bitrate=64, max_bitrate=192, p=1.0)(samples, sample_rate)
34
38
  return sf.write('audio.mp3', wav, sr, format='MP3', bitrate='192k')
35
39
 
@@ -47,11 +51,20 @@ def opus_aug_save(wav: np.ndarray, sr: int, output_filepath: str):
47
51
  对音频进行 Opus 压缩, 并直接保存到文件。
48
52
  使用 PyAV 实现, 比特率是随机的。
49
53
  """
54
+ try:
55
+ import av
56
+ except ImportError:
57
+ raise ImportError("av is required for opus_aug_save(). "
58
+ "Please install it via `pip install av`.")
59
+
50
60
  # 随机选择一个比特率 (kbps)
51
61
  bitrate_kbps = random.choice([24, 32, 48, 64, 96, 128])
52
- output_filepath_with_bitrate = output_filepath.replace('.opus', f'_{bitrate_kbps}k.opus')
62
+ output_filepath_with_bitrate = output_filepath.replace(
63
+ '.opus', f'_{bitrate_kbps}k.opus')
53
64
 
54
- print(f" -> Saving Opus augmented version to: {output_filepath_with_bitrate} (Bitrate: {bitrate_kbps}k)")
65
+ print(
66
+ f" -> Saving Opus augmented version to: {output_filepath_with_bitrate} (Bitrate: {bitrate_kbps}k)"
67
+ )
55
68
 
56
69
  # PyAV 需要 (n_channels, n_samples) 格式
57
70
  wav_ch_first = wav.T if wav.ndim > 1 else wav.reshape(1, -1)
@@ -81,19 +94,28 @@ def aac_aug_save(wav: np.ndarray, sr: int, output_filepath: str):
81
94
  对音频进行 AAC 压缩, 并直接保存到文件。
82
95
  使用 PyAV 实现, 比特率是随机的。
83
96
  """
97
+ try:
98
+ import av
99
+ except ImportError:
100
+ raise ImportError("av is required for aac_aug_save(). "
101
+ "Please install it via `pip install av`.")
84
102
  # 随机选择一个比特率 (kbps)
85
103
  bitrate_kbps = random.choice([48, 64, 96, 128, 160, 192])
86
104
  # .m4a 是 AAC 更常用的文件后缀
87
- output_filepath_with_bitrate = output_filepath.replace('.aac', f'_{bitrate_kbps}k.m4a')
105
+ output_filepath_with_bitrate = output_filepath.replace(
106
+ '.aac', f'_{bitrate_kbps}k.m4a')
88
107
 
89
- print(f" -> Saving AAC augmented version to: {output_filepath_with_bitrate} (Bitrate: {bitrate_kbps}k)")
108
+ print(
109
+ f" -> Saving AAC augmented version to: {output_filepath_with_bitrate} (Bitrate: {bitrate_kbps}k)"
110
+ )
90
111
 
91
112
  # PyAV 需要 (n_channels, n_samples) 格式
92
113
  wav_ch_first = wav.T if wav.ndim > 1 else wav.reshape(1, -1)
93
114
  layout = 'stereo' if wav.ndim > 1 else 'mono'
94
115
 
95
116
  # 注意:format='adts' 是原始 AAC 流, 'mp4' 会创建 .m4a/.mp4 容器
96
- with av.open(output_filepath_with_bitrate, mode='w', format='mp4') as container:
117
+ with av.open(output_filepath_with_bitrate, mode='w',
118
+ format='mp4') as container:
97
119
  # 使用高质量的 fdk_aac 编码器
98
120
  stream = container.add_stream('libfdk_aac', rate=sr, layout=layout)
99
121
  stream.bit_rate = bitrate_kbps * 1000
@@ -111,13 +133,11 @@ def aac_aug_save(wav: np.ndarray, sr: int, output_filepath: str):
111
133
  print(f" ... success.")
112
134
 
113
135
 
114
- def flac_encode_save(
115
- wav: np.ndarray,
116
- sr: int,
117
- output_filepath: str,
118
- compression_level: int = 5,
119
- bits_per_sample=None
120
- ):
136
+ def flac_encode_save(wav: np.ndarray,
137
+ sr: int,
138
+ output_filepath: str,
139
+ compression_level: int = 5,
140
+ bits_per_sample=None):
121
141
  """
122
142
  使用 pyFLAC 将 NumPy 音频数组编码为 FLAC 文件并保存。
123
143
 
@@ -164,14 +184,14 @@ def flac_encode_save(
164
184
  else:
165
185
  wav_int = wav
166
186
  else:
167
- raise ValueError(f"Unsupported bits_per_sample: {bits_per_sample}. Must be 16, 24, or 32.")
187
+ raise ValueError(
188
+ f"Unsupported bits_per_sample: {bits_per_sample}. Must be 16, 24, or 32."
189
+ )
168
190
 
169
191
  # --- 2. 初始化编码器 ---
170
- encoder = Encoder(
171
- sample_rate=sr,
172
- bits_per_sample=bits_per_sample,
173
- compression_level=compression_level
174
- )
192
+ encoder = Encoder(sample_rate=sr,
193
+ bits_per_sample=bits_per_sample,
194
+ compression_level=compression_level)
175
195
 
176
196
  # --- 3. 处理数据并获取编码后的字节 ---
177
197
  # Encoder.process() 可以分块处理, 但对于中等长度的音频, 一次性处理更简单
@@ -196,8 +216,8 @@ def amr_wb_aug(wav, sr):
196
216
  # return ApplyCodec(encoder="libamr_wb", p=1.0)(samples, sample_rate)
197
217
  return sf.write('audio.amr', wav, sr, format='AMR', bitrate='192k')
198
218
 
199
- # Opus 编解码数据增强
200
219
 
220
+ # Opus 编解码数据增强
201
221
 
202
222
  if __name__ == "__main__":
203
223
  wav_path = "/data/never/Desktop/kws_train/QA/wav_data/TIMIT.wav"