hyper-animator-codex 0.1.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +59 -0
- package/bin/hyper-animator-codex.mjs +16 -28
- package/lib/install-options.mjs +44 -0
- package/lib/install-skill.mjs +16 -0
- package/lib/minimax-config.mjs +162 -0
- package/package.json +1 -1
- package/skills/hyper-animator-codex/SKILL.md +16 -8
- package/skills/hyper-animator-codex/references/beat-sync-workflow.md +51 -0
- package/skills/hyper-animator-codex/references/minimax-music-workflow.md +77 -0
- package/skills/hyper-animator-codex/scripts/analyze_music_beats.py +78 -0
- package/skills/hyper-animator-codex/scripts/generate_minimax_music.mjs +346 -0
- package/skills/hyper-animator-codex/scripts/minimax_runtime_config.mjs +113 -0
- package/skills/hyper-animator-codex/vendor/music-beat-detector/README.md +13 -0
- package/skills/hyper-animator-codex/vendor/music-beat-detector/beat_detector/__init__.py +33 -0
- package/skills/hyper-animator-codex/vendor/music-beat-detector/beat_detector/analyzer.py +129 -0
- package/skills/hyper-animator-codex/vendor/music-beat-detector/beat_detector/beat.py +133 -0
- package/skills/hyper-animator-codex/vendor/music-beat-detector/beat_detector/cli.py +74 -0
- package/skills/hyper-animator-codex/vendor/music-beat-detector/beat_detector/errors.py +49 -0
- package/skills/hyper-animator-codex/vendor/music-beat-detector/beat_detector/structure.py +171 -0
- package/skills/hyper-animator-codex/vendor/music-beat-detector/beat_detector/utils.py +73 -0
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
"""节拍检测模块"""
|
|
2
|
+
from dataclasses import dataclass
|
|
3
|
+
from typing import List, Callable, Optional
|
|
4
|
+
from enum import Enum
|
|
5
|
+
import numpy as np
|
|
6
|
+
import librosa
|
|
7
|
+
|
|
8
|
+
from .utils import ms_to_frame
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class EnergyLevel(Enum):
|
|
12
|
+
"""节拍能量级别"""
|
|
13
|
+
WEAK = "weak"
|
|
14
|
+
MEDIUM = "medium"
|
|
15
|
+
STRONG = "strong"
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@dataclass
|
|
19
|
+
class Beat:
|
|
20
|
+
"""单个节拍"""
|
|
21
|
+
time_ms: int
|
|
22
|
+
frame: int
|
|
23
|
+
beat_in_bar: int
|
|
24
|
+
energy_level: EnergyLevel = EnergyLevel.MEDIUM
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@dataclass
|
|
28
|
+
class BeatResult:
|
|
29
|
+
"""节拍检测结果"""
|
|
30
|
+
bpm: float
|
|
31
|
+
beats: List[Beat]
|
|
32
|
+
time_signature: str = "4/4"
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def detect_beats(
|
|
36
|
+
y: np.ndarray,
|
|
37
|
+
sr: int,
|
|
38
|
+
fps: int = 30,
|
|
39
|
+
on_progress: Optional[Callable[[float], None]] = None
|
|
40
|
+
) -> BeatResult:
|
|
41
|
+
"""检测节拍"""
|
|
42
|
+
|
|
43
|
+
if on_progress:
|
|
44
|
+
on_progress(10.0)
|
|
45
|
+
|
|
46
|
+
# 使用 librosa 检测节拍
|
|
47
|
+
tempo, beat_frames = librosa.beat.beat_track(y=y, sr=sr)
|
|
48
|
+
bpm = float(np.asarray(tempo).reshape(-1)[0])
|
|
49
|
+
|
|
50
|
+
if on_progress:
|
|
51
|
+
on_progress(40.0)
|
|
52
|
+
|
|
53
|
+
# 转换为时间(毫秒)
|
|
54
|
+
beat_times = librosa.frames_to_time(beat_frames, sr=sr)
|
|
55
|
+
beat_times_ms = [int(t * 1000) for t in beat_times]
|
|
56
|
+
|
|
57
|
+
# 计算每个beat的能量
|
|
58
|
+
beat_energies = _compute_beat_energies(y, sr, beat_frames)
|
|
59
|
+
|
|
60
|
+
if on_progress:
|
|
61
|
+
on_progress(70.0)
|
|
62
|
+
|
|
63
|
+
# 根据能量分布分级
|
|
64
|
+
energy_levels = _classify_energy_levels(beat_energies)
|
|
65
|
+
|
|
66
|
+
if on_progress:
|
|
67
|
+
on_progress(80.0)
|
|
68
|
+
|
|
69
|
+
# 构建 Beat 列表
|
|
70
|
+
beats = []
|
|
71
|
+
beats_per_bar = 4 # 假设 4/4 拍
|
|
72
|
+
for i, time_ms in enumerate(beat_times_ms):
|
|
73
|
+
frame = ms_to_frame(time_ms, fps)
|
|
74
|
+
beat_in_bar = (i % beats_per_bar) + 1
|
|
75
|
+
beats.append(Beat(
|
|
76
|
+
time_ms=time_ms,
|
|
77
|
+
frame=frame,
|
|
78
|
+
beat_in_bar=beat_in_bar,
|
|
79
|
+
energy_level=energy_levels[i]
|
|
80
|
+
))
|
|
81
|
+
|
|
82
|
+
if on_progress:
|
|
83
|
+
on_progress(100.0)
|
|
84
|
+
|
|
85
|
+
return BeatResult(bpm=bpm, beats=beats)
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def _compute_beat_energies(
|
|
89
|
+
y: np.ndarray,
|
|
90
|
+
sr: int,
|
|
91
|
+
beat_frames: np.ndarray,
|
|
92
|
+
window_ms: int = 50
|
|
93
|
+
) -> np.ndarray:
|
|
94
|
+
"""计算每个beat时刻的局部能量"""
|
|
95
|
+
window_samples = int(sr * window_ms / 1000)
|
|
96
|
+
energies = []
|
|
97
|
+
|
|
98
|
+
for frame in beat_frames:
|
|
99
|
+
# 将librosa帧转换为样本索引
|
|
100
|
+
sample = librosa.frames_to_samples(frame)
|
|
101
|
+
start = max(0, sample - window_samples // 2)
|
|
102
|
+
end = min(len(y), sample + window_samples // 2)
|
|
103
|
+
|
|
104
|
+
if end > start:
|
|
105
|
+
# 使用RMS作为能量度量
|
|
106
|
+
segment = y[start:end]
|
|
107
|
+
rms = np.sqrt(np.mean(segment ** 2))
|
|
108
|
+
energies.append(rms)
|
|
109
|
+
else:
|
|
110
|
+
energies.append(0.0)
|
|
111
|
+
|
|
112
|
+
return np.array(energies)
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def _classify_energy_levels(energies: np.ndarray) -> List[EnergyLevel]:
|
|
116
|
+
"""根据能量分布分类为弱/中/强三档"""
|
|
117
|
+
if len(energies) == 0:
|
|
118
|
+
return []
|
|
119
|
+
|
|
120
|
+
# 使用百分位数划分阈值
|
|
121
|
+
weak_threshold = np.percentile(energies, 33)
|
|
122
|
+
strong_threshold = np.percentile(energies, 66)
|
|
123
|
+
|
|
124
|
+
levels = []
|
|
125
|
+
for energy in energies:
|
|
126
|
+
if energy <= weak_threshold:
|
|
127
|
+
levels.append(EnergyLevel.WEAK)
|
|
128
|
+
elif energy >= strong_threshold:
|
|
129
|
+
levels.append(EnergyLevel.STRONG)
|
|
130
|
+
else:
|
|
131
|
+
levels.append(EnergyLevel.MEDIUM)
|
|
132
|
+
|
|
133
|
+
return levels
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
"""命令行接口"""
|
|
2
|
+
import json
|
|
3
|
+
import sys
|
|
4
|
+
import click
|
|
5
|
+
|
|
6
|
+
from .analyzer import analyze
|
|
7
|
+
from .errors import (
|
|
8
|
+
BeatDetectorError,
|
|
9
|
+
FileNotFoundError,
|
|
10
|
+
UnsupportedFormatError,
|
|
11
|
+
AnalysisError,
|
|
12
|
+
FFmpegRequiredError
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@click.command()
|
|
17
|
+
@click.argument('input_file', type=click.Path(exists=False))
|
|
18
|
+
@click.option('-o', '--output', type=click.Path(), help='Output file path')
|
|
19
|
+
@click.option('--fps', default=30, help='Frame rate (default: 30)')
|
|
20
|
+
@click.option('--log-level', default='info',
|
|
21
|
+
type=click.Choice(['debug', 'info', 'warning', 'error']),
|
|
22
|
+
help='Log level (default: info)')
|
|
23
|
+
@click.option('--pretty', is_flag=True, help='Pretty print JSON')
|
|
24
|
+
def main(input_file: str, output: str, fps: int, log_level: str, pretty: bool) -> None:
|
|
25
|
+
"""
|
|
26
|
+
Music beat detector CLI
|
|
27
|
+
|
|
28
|
+
Analyze audio file and output beat detection results in JSON format.
|
|
29
|
+
|
|
30
|
+
Supported formats (no extra dependencies):
|
|
31
|
+
.wav, .flac, .ogg
|
|
32
|
+
|
|
33
|
+
Formats requiring ffmpeg:
|
|
34
|
+
.mp3, .m4a, .aac, .wma
|
|
35
|
+
|
|
36
|
+
Usage:
|
|
37
|
+
beat-detector input.mp3 -o output.json
|
|
38
|
+
beat-detector input.wav --pretty
|
|
39
|
+
"""
|
|
40
|
+
try:
|
|
41
|
+
result = analyze(input_file, fps=fps, log_level=log_level)
|
|
42
|
+
json_output = result.to_json(pretty=pretty)
|
|
43
|
+
|
|
44
|
+
if output:
|
|
45
|
+
result.save(output, pretty=pretty)
|
|
46
|
+
click.echo(f"Output saved to: {output}")
|
|
47
|
+
else:
|
|
48
|
+
click.echo(json_output)
|
|
49
|
+
|
|
50
|
+
sys.exit(0)
|
|
51
|
+
|
|
52
|
+
except FileNotFoundError as e:
|
|
53
|
+
click.echo(f"Error: {e.message}", err=True)
|
|
54
|
+
sys.exit(1)
|
|
55
|
+
|
|
56
|
+
except UnsupportedFormatError as e:
|
|
57
|
+
click.echo(f"Error: {e.message}", err=True)
|
|
58
|
+
sys.exit(2)
|
|
59
|
+
|
|
60
|
+
except FFmpegRequiredError as e:
|
|
61
|
+
click.echo(f"Error: {e.message}", err=True)
|
|
62
|
+
sys.exit(4)
|
|
63
|
+
|
|
64
|
+
except AnalysisError as e:
|
|
65
|
+
click.echo(f"Error: {e.message}", err=True)
|
|
66
|
+
sys.exit(3)
|
|
67
|
+
|
|
68
|
+
except Exception as e:
|
|
69
|
+
click.echo(f"Unexpected error: {str(e)}", err=True)
|
|
70
|
+
sys.exit(3)
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
if __name__ == '__main__':
|
|
74
|
+
main()
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
"""错误类型"""
|
|
2
|
+
from typing import Optional
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class BeatDetectorError(Exception):
|
|
6
|
+
"""基础异常类"""
|
|
7
|
+
|
|
8
|
+
def __init__(self, message: str, code: int = 1):
|
|
9
|
+
self.message = message
|
|
10
|
+
self.code = code
|
|
11
|
+
super().__init__(self.message)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class FileNotFoundError(BeatDetectorError):
|
|
15
|
+
"""文件不存在"""
|
|
16
|
+
|
|
17
|
+
def __init__(self, path: str):
|
|
18
|
+
super().__init__(f"File not found: {path}", code=1)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class UnsupportedFormatError(BeatDetectorError):
|
|
22
|
+
"""不支持的音频格式"""
|
|
23
|
+
|
|
24
|
+
def __init__(self, format: str):
|
|
25
|
+
super().__init__(f"Unsupported audio format: {format}", code=2)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class FFmpegRequiredError(BeatDetectorError):
|
|
29
|
+
"""需要安装 ffmpeg"""
|
|
30
|
+
|
|
31
|
+
def __init__(self, format: str):
|
|
32
|
+
message = (
|
|
33
|
+
f"Format '{format}' requires ffmpeg. "
|
|
34
|
+
f"Please install ffmpeg:\n"
|
|
35
|
+
f" Ubuntu/Debian: sudo apt install ffmpeg\n"
|
|
36
|
+
f" macOS: brew install ffmpeg\n"
|
|
37
|
+
f" Windows: choco install ffmpeg\n"
|
|
38
|
+
f"\n"
|
|
39
|
+
f"Or use native formats: .wav, .flac, .ogg"
|
|
40
|
+
)
|
|
41
|
+
super().__init__(message, code=4)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class AnalysisError(BeatDetectorError):
|
|
45
|
+
"""分析失败"""
|
|
46
|
+
|
|
47
|
+
def __init__(self, message: str, cause: Optional[Exception] = None):
|
|
48
|
+
self.cause = cause
|
|
49
|
+
super().__init__(message, code=3)
|
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
"""结构检测模块"""
|
|
2
|
+
from dataclasses import dataclass
|
|
3
|
+
from typing import List, Callable, Optional
|
|
4
|
+
import numpy as np
|
|
5
|
+
import librosa
|
|
6
|
+
|
|
7
|
+
from .utils import ms_to_frame
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass
|
|
11
|
+
class Segment:
|
|
12
|
+
"""音乐段落"""
|
|
13
|
+
type: str
|
|
14
|
+
start_ms: int
|
|
15
|
+
end_ms: int
|
|
16
|
+
start_frame: int
|
|
17
|
+
end_frame: int
|
|
18
|
+
confidence: float
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dataclass
|
|
22
|
+
class EnergyPeak:
|
|
23
|
+
"""能量高峰"""
|
|
24
|
+
time_ms: int
|
|
25
|
+
frame: int
|
|
26
|
+
intensity: float
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@dataclass
|
|
30
|
+
class SilenceRegion:
|
|
31
|
+
"""静音区域"""
|
|
32
|
+
start_ms: int
|
|
33
|
+
end_ms: int
|
|
34
|
+
start_frame: int
|
|
35
|
+
end_frame: int
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@dataclass
|
|
39
|
+
class StructureResult:
|
|
40
|
+
"""结构检测结果"""
|
|
41
|
+
segments: List[Segment]
|
|
42
|
+
energy_peaks: List[EnergyPeak]
|
|
43
|
+
silence_regions: List[SilenceRegion]
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def detect_structure(
|
|
47
|
+
y: np.ndarray,
|
|
48
|
+
sr: int,
|
|
49
|
+
fps: int = 30,
|
|
50
|
+
on_progress: Optional[Callable[[float], None]] = None
|
|
51
|
+
) -> StructureResult:
|
|
52
|
+
"""检测音乐结构"""
|
|
53
|
+
|
|
54
|
+
if on_progress:
|
|
55
|
+
on_progress(10.0)
|
|
56
|
+
|
|
57
|
+
# 计算能量
|
|
58
|
+
rms = librosa.feature.rms(y=y)[0]
|
|
59
|
+
rms_frames = librosa.frames_to_time(range(len(rms)), sr=sr)
|
|
60
|
+
|
|
61
|
+
if on_progress:
|
|
62
|
+
on_progress(30.0)
|
|
63
|
+
|
|
64
|
+
# 检测能量高峰
|
|
65
|
+
energy_peaks = _detect_energy_peaks(rms, rms_frames, fps)
|
|
66
|
+
|
|
67
|
+
if on_progress:
|
|
68
|
+
on_progress(50.0)
|
|
69
|
+
|
|
70
|
+
# 检测静音区域
|
|
71
|
+
silence_regions = _detect_silence_regions(rms, rms_frames, fps, sr)
|
|
72
|
+
|
|
73
|
+
if on_progress:
|
|
74
|
+
on_progress(70.0)
|
|
75
|
+
|
|
76
|
+
# 简单的段落检测(基于能量变化)
|
|
77
|
+
segments = _detect_segments(y, sr, rms, rms_frames, fps)
|
|
78
|
+
|
|
79
|
+
if on_progress:
|
|
80
|
+
on_progress(100.0)
|
|
81
|
+
|
|
82
|
+
return StructureResult(
|
|
83
|
+
segments=segments,
|
|
84
|
+
energy_peaks=energy_peaks,
|
|
85
|
+
silence_regions=silence_regions
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def _detect_energy_peaks(rms: np.ndarray, times: np.ndarray, fps: int) -> List[EnergyPeak]:
|
|
90
|
+
"""检测能量高峰"""
|
|
91
|
+
peaks = []
|
|
92
|
+
threshold = np.mean(rms) + np.std(rms)
|
|
93
|
+
|
|
94
|
+
for i in range(1, len(rms) - 1):
|
|
95
|
+
if rms[i] > threshold and rms[i] > rms[i-1] and rms[i] > rms[i+1]:
|
|
96
|
+
time_ms = int(times[i] * 1000)
|
|
97
|
+
frame = ms_to_frame(time_ms, fps)
|
|
98
|
+
intensity = float(rms[i] / np.max(rms)) if np.max(rms) > 0 else 0.0
|
|
99
|
+
peaks.append(EnergyPeak(
|
|
100
|
+
time_ms=time_ms,
|
|
101
|
+
frame=frame,
|
|
102
|
+
intensity=intensity
|
|
103
|
+
))
|
|
104
|
+
|
|
105
|
+
return peaks
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def _detect_silence_regions(rms: np.ndarray, times: np.ndarray, fps: int, sr: int) -> List[SilenceRegion]:
|
|
109
|
+
"""检测静音区域"""
|
|
110
|
+
regions = []
|
|
111
|
+
threshold = np.mean(rms) * 0.1 # 静音阈值
|
|
112
|
+
min_silence_duration = 0.5 # 最小静音时长(秒)
|
|
113
|
+
|
|
114
|
+
in_silence = False
|
|
115
|
+
silence_start = 0
|
|
116
|
+
|
|
117
|
+
for i, energy in enumerate(rms):
|
|
118
|
+
if energy < threshold:
|
|
119
|
+
if not in_silence:
|
|
120
|
+
in_silence = True
|
|
121
|
+
silence_start = i
|
|
122
|
+
else:
|
|
123
|
+
if in_silence:
|
|
124
|
+
silence_duration = times[i] - times[silence_start]
|
|
125
|
+
if silence_duration >= min_silence_duration:
|
|
126
|
+
start_ms = int(times[silence_start] * 1000)
|
|
127
|
+
end_ms = int(times[i] * 1000)
|
|
128
|
+
regions.append(SilenceRegion(
|
|
129
|
+
start_ms=start_ms,
|
|
130
|
+
end_ms=end_ms,
|
|
131
|
+
start_frame=ms_to_frame(start_ms, fps),
|
|
132
|
+
end_frame=ms_to_frame(end_ms, fps)
|
|
133
|
+
))
|
|
134
|
+
in_silence = False
|
|
135
|
+
|
|
136
|
+
return regions
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def _detect_segments(y: np.ndarray, sr: int, rms: np.ndarray, times: np.ndarray, fps: int) -> List[Segment]:
|
|
140
|
+
"""简单的段落检测"""
|
|
141
|
+
segments = []
|
|
142
|
+
duration_ms = int(len(y) / sr * 1000)
|
|
143
|
+
|
|
144
|
+
# 简单策略:将音频分成 intro, main, outro
|
|
145
|
+
num_sections = 3
|
|
146
|
+
section_duration = duration_ms // num_sections
|
|
147
|
+
|
|
148
|
+
# 计算每个部分的平均能量
|
|
149
|
+
rms_per_section = np.array_split(rms, num_sections)
|
|
150
|
+
avg_energies = [np.mean(s) for s in rms_per_section]
|
|
151
|
+
|
|
152
|
+
section_types = ['intro', 'verse', 'outro']
|
|
153
|
+
|
|
154
|
+
for i in range(num_sections):
|
|
155
|
+
start_ms = i * section_duration
|
|
156
|
+
end_ms = (i + 1) * section_duration if i < num_sections - 1 else duration_ms
|
|
157
|
+
|
|
158
|
+
# 根据相对能量计算置信度
|
|
159
|
+
max_energy = max(avg_energies) if max(avg_energies) > 0 else 1
|
|
160
|
+
confidence = float(avg_energies[i] / max_energy)
|
|
161
|
+
|
|
162
|
+
segments.append(Segment(
|
|
163
|
+
type=section_types[i],
|
|
164
|
+
start_ms=start_ms,
|
|
165
|
+
end_ms=end_ms,
|
|
166
|
+
start_frame=ms_to_frame(start_ms, fps),
|
|
167
|
+
end_frame=ms_to_frame(end_ms, fps),
|
|
168
|
+
confidence=confidence
|
|
169
|
+
))
|
|
170
|
+
|
|
171
|
+
return segments
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
"""工具函数"""
|
|
2
|
+
from typing import Tuple
|
|
3
|
+
import os
|
|
4
|
+
import logging
|
|
5
|
+
import numpy as np
|
|
6
|
+
|
|
7
|
+
from .errors import FileNotFoundError, UnsupportedFormatError, FFmpegRequiredError
|
|
8
|
+
|
|
9
|
+
# 原生支持的格式(无需额外依赖)
|
|
10
|
+
NATIVE_FORMATS = {'.wav', '.flac', '.ogg'}
|
|
11
|
+
|
|
12
|
+
# 需要 ffmpeg 的格式
|
|
13
|
+
FFMPEG_FORMATS = {'.mp3', '.m4a', '.aac', '.wma'}
|
|
14
|
+
|
|
15
|
+
# 所有支持的格式
|
|
16
|
+
SUPPORTED_FORMATS = NATIVE_FORMATS | FFMPEG_FORMATS
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def ms_to_frame(ms: int, fps: int) -> int:
|
|
20
|
+
"""毫秒转帧号"""
|
|
21
|
+
return int(ms * fps / 1000)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def frame_to_ms(frame: int, fps: int) -> int:
|
|
25
|
+
"""帧号转毫秒"""
|
|
26
|
+
return int(frame * 1000 / fps)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def check_ffmpeg_available() -> bool:
|
|
30
|
+
"""检查 ffmpeg 是否可用"""
|
|
31
|
+
import shutil
|
|
32
|
+
return shutil.which('ffmpeg') is not None
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def load_audio(file_path: str) -> Tuple[np.ndarray, int]:
|
|
36
|
+
"""加载音频文件
|
|
37
|
+
|
|
38
|
+
原生支持: .wav, .flac, .ogg (无需额外依赖)
|
|
39
|
+
需要 ffmpeg: .mp3, .m4a, .aac, .wma
|
|
40
|
+
"""
|
|
41
|
+
import librosa
|
|
42
|
+
|
|
43
|
+
# 检查文件是否存在
|
|
44
|
+
if not os.path.exists(file_path):
|
|
45
|
+
raise FileNotFoundError(file_path)
|
|
46
|
+
|
|
47
|
+
# 检查格式
|
|
48
|
+
ext = os.path.splitext(file_path)[1].lower()
|
|
49
|
+
if ext not in SUPPORTED_FORMATS:
|
|
50
|
+
raise UnsupportedFormatError(ext)
|
|
51
|
+
|
|
52
|
+
# 检查是否需要 ffmpeg
|
|
53
|
+
if ext in FFMPEG_FORMATS and not check_ffmpeg_available():
|
|
54
|
+
raise FFmpegRequiredError(ext)
|
|
55
|
+
|
|
56
|
+
# 加载音频
|
|
57
|
+
y, sr = librosa.load(file_path, sr=None)
|
|
58
|
+
return y, sr
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def setup_logging(level: str = "info") -> None:
|
|
62
|
+
"""配置日志"""
|
|
63
|
+
level_map = {
|
|
64
|
+
"debug": logging.DEBUG,
|
|
65
|
+
"info": logging.INFO,
|
|
66
|
+
"warning": logging.WARNING,
|
|
67
|
+
"error": logging.ERROR,
|
|
68
|
+
}
|
|
69
|
+
logging.basicConfig(
|
|
70
|
+
level=level_map.get(level, logging.INFO),
|
|
71
|
+
format="[%(levelname)s] %(message)s",
|
|
72
|
+
force=True
|
|
73
|
+
)
|