@rookiestar/eng-lang-tutor 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.local.json +22 -0
- package/.gitignore +32 -0
- package/CHANGELOG.md +37 -0
- package/CLAUDE.md +275 -0
- package/README.md +369 -0
- package/SKILL.md +613 -0
- package/bin/eng-lang-tutor.js +177 -0
- package/docs/OPENCLAW_DEPLOYMENT.md +241 -0
- package/examples/sample_keypoint_a1.json +112 -0
- package/examples/sample_keypoint_a2.json +124 -0
- package/examples/sample_keypoint_b1.json +135 -0
- package/examples/sample_keypoint_b2.json +137 -0
- package/examples/sample_keypoint_c1.json +134 -0
- package/examples/sample_keypoint_c2.json +141 -0
- package/examples/sample_quiz_a1.json +94 -0
- package/examples/sample_quiz_a2.json +94 -0
- package/examples/sample_quiz_b1.json +92 -0
- package/examples/sample_quiz_b2.json +94 -0
- package/examples/sample_quiz_c1.json +94 -0
- package/examples/sample_quiz_c2.json +104 -0
- package/package.json +41 -0
- package/references/resources.md +292 -0
- package/requirements.txt +16 -0
- package/scripts/__init__.py +28 -0
- package/scripts/audio/__init__.py +23 -0
- package/scripts/audio/composer.py +367 -0
- package/scripts/audio/converter.py +331 -0
- package/scripts/audio/feishu_voice.py +404 -0
- package/scripts/audio/tts/__init__.py +30 -0
- package/scripts/audio/tts/base.py +166 -0
- package/scripts/audio/tts/manager.py +306 -0
- package/scripts/audio/tts/providers/__init__.py +12 -0
- package/scripts/audio/tts/providers/edge.py +111 -0
- package/scripts/audio/tts/providers/xunfei.py +205 -0
- package/scripts/audio/utils.py +63 -0
- package/scripts/cli/__init__.py +7 -0
- package/scripts/cli/cli.py +229 -0
- package/scripts/cli/command_parser.py +336 -0
- package/scripts/core/__init__.py +30 -0
- package/scripts/core/constants.py +125 -0
- package/scripts/core/error_notebook.py +308 -0
- package/scripts/core/gamification.py +405 -0
- package/scripts/core/scorer.py +295 -0
- package/scripts/core/state_manager.py +814 -0
- package/scripts/eng-lang-tutor +16 -0
- package/scripts/scheduling/__init__.py +6 -0
- package/scripts/scheduling/cron_push.py +229 -0
- package/scripts/utils/__init__.py +12 -0
- package/scripts/utils/dedup.py +331 -0
- package/scripts/utils/helpers.py +82 -0
- package/templates/keypoint_schema.json +420 -0
- package/templates/prompt_templates.md +73 -0
- package/templates/prompts/display_guide.md +106 -0
- package/templates/prompts/initialization.md +350 -0
- package/templates/prompts/keypoint_generation.md +272 -0
- package/templates/prompts/output_rules.md +106 -0
- package/templates/prompts/quiz_generation.md +190 -0
- package/templates/prompts/responses.md +339 -0
- package/templates/prompts/shared_enums.md +252 -0
- package/templates/quiz_schema.json +214 -0
- package/templates/state_schema.json +277 -0
|
@@ -0,0 +1,367 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
知识点音频合成器 - 将多个音频片段合并为单个文件
|
|
4
|
+
|
|
5
|
+
音频结构:
|
|
6
|
+
- expressions: 引导语 [1s] 内容 [2s]
|
|
7
|
+
- alternatives: 引导语 [1s] 内容 [2s]
|
|
8
|
+
- dialogues: 引导语 [1s] 对话行1 [0.5s] 对话行2 ...
|
|
9
|
+
|
|
10
|
+
使用示例:
|
|
11
|
+
from scripts.audio_composer import AudioComposer
|
|
12
|
+
from scripts.tts import TTSManager
|
|
13
|
+
|
|
14
|
+
tts = TTSManager.from_env()
|
|
15
|
+
composer = AudioComposer(tts)
|
|
16
|
+
|
|
17
|
+
result = composer.compose_keypoint_audio(keypoint, Path("output.mp3"))
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
import subprocess
|
|
21
|
+
import tempfile
|
|
22
|
+
import shutil
|
|
23
|
+
from pathlib import Path
|
|
24
|
+
from typing import Optional, List
|
|
25
|
+
from dataclasses import dataclass
|
|
26
|
+
|
|
27
|
+
from .tts import TTSManager
|
|
28
|
+
from .utils import get_ffmpeg_path, get_audio_duration
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@dataclass
|
|
32
|
+
class CompositionResult:
|
|
33
|
+
"""音频合成结果"""
|
|
34
|
+
success: bool
|
|
35
|
+
audio_path: Optional[Path] = None
|
|
36
|
+
duration_seconds: Optional[float] = None
|
|
37
|
+
error_message: Optional[str] = None
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class AudioComposer:
|
|
41
|
+
"""
|
|
42
|
+
知识点音频合成器
|
|
43
|
+
|
|
44
|
+
将 expressions + alternatives + dialogues 合并为单个音频文件
|
|
45
|
+
"""
|
|
46
|
+
|
|
47
|
+
def __init__(
|
|
48
|
+
self,
|
|
49
|
+
tts_manager: TTSManager,
|
|
50
|
+
ffmpeg_path: Optional[str] = None
|
|
51
|
+
):
|
|
52
|
+
"""
|
|
53
|
+
初始化音频合成器
|
|
54
|
+
|
|
55
|
+
Args:
|
|
56
|
+
tts_manager: TTS 管理器实例
|
|
57
|
+
ffmpeg_path: ffmpeg 可执行文件路径(默认自动检测)
|
|
58
|
+
"""
|
|
59
|
+
self.tts = tts_manager
|
|
60
|
+
self.ffmpeg_path = ffmpeg_path or get_ffmpeg_path()
|
|
61
|
+
|
|
62
|
+
# 创建临时目录用于存放中间文件
|
|
63
|
+
self.temp_dir = Path(tempfile.mkdtemp(prefix="audio_composer_"))
|
|
64
|
+
|
|
65
|
+
def __enter__(self):
|
|
66
|
+
"""上下文管理器入口"""
|
|
67
|
+
return self
|
|
68
|
+
|
|
69
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
70
|
+
"""上下文管理器退出,确保清理临时目录"""
|
|
71
|
+
self._cleanup()
|
|
72
|
+
return False
|
|
73
|
+
|
|
74
|
+
def _cleanup(self):
|
|
75
|
+
"""清理临时目录"""
|
|
76
|
+
if hasattr(self, 'temp_dir') and self.temp_dir.exists():
|
|
77
|
+
shutil.rmtree(self.temp_dir, ignore_errors=True)
|
|
78
|
+
|
|
79
|
+
def __del__(self):
|
|
80
|
+
"""析构函数(备用清理,不保证被调用)"""
|
|
81
|
+
self._cleanup()
|
|
82
|
+
|
|
83
|
+
def compose_keypoint_audio(
|
|
84
|
+
self,
|
|
85
|
+
keypoint: dict,
|
|
86
|
+
output_path: Path,
|
|
87
|
+
lead_in_silence: float = 1.0, # 引导语后留白
|
|
88
|
+
section_silence: float = 2.0, # 内容后留白(段落间隔)
|
|
89
|
+
dialogue_silence: float = 0.5, # 对话行之间留白
|
|
90
|
+
narrator_voice: str = "henry", # 旁白音色(男声)
|
|
91
|
+
voice_a: str = "mary", # 对话 A 音色(女声)
|
|
92
|
+
voice_b: str = "henry", # 对话 B 音色(男声,沉稳)
|
|
93
|
+
speed: float = 0.9 # 语速
|
|
94
|
+
) -> CompositionResult:
|
|
95
|
+
"""
|
|
96
|
+
合成知识点音频
|
|
97
|
+
|
|
98
|
+
Args:
|
|
99
|
+
keypoint: 知识点数据
|
|
100
|
+
output_path: 输出文件路径
|
|
101
|
+
lead_in_silence: 引导语后留白时长(秒)
|
|
102
|
+
section_silence: 内容后留白时长(秒)
|
|
103
|
+
dialogue_silence: 对话行之间留白时长(秒)
|
|
104
|
+
narrator_voice: 旁白音色
|
|
105
|
+
voice_a: 对话 A 角色音色
|
|
106
|
+
voice_b: 对话 B 角色音色
|
|
107
|
+
speed: 语速
|
|
108
|
+
|
|
109
|
+
Returns:
|
|
110
|
+
CompositionResult: 合成结果
|
|
111
|
+
"""
|
|
112
|
+
try:
|
|
113
|
+
output_path = Path(output_path)
|
|
114
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
115
|
+
|
|
116
|
+
segments: List[Path] = []
|
|
117
|
+
segment_index = 0
|
|
118
|
+
|
|
119
|
+
# 1. Expressions 部分
|
|
120
|
+
expressions = keypoint.get("expressions", [])
|
|
121
|
+
if expressions:
|
|
122
|
+
# 引导语
|
|
123
|
+
lead_in = self._synthesize_segment(
|
|
124
|
+
text="Key expressions",
|
|
125
|
+
voice=narrator_voice,
|
|
126
|
+
speed=speed,
|
|
127
|
+
index=segment_index
|
|
128
|
+
)
|
|
129
|
+
segments.append(lead_in)
|
|
130
|
+
segment_index += 1
|
|
131
|
+
|
|
132
|
+
# 引导语后留白
|
|
133
|
+
silence_1s = self._generate_silence(lead_in_silence)
|
|
134
|
+
segments.append(silence_1s)
|
|
135
|
+
|
|
136
|
+
# 内容
|
|
137
|
+
phrases = [expr.get("phrase", "") for expr in expressions]
|
|
138
|
+
content_text = ". ".join(p for p in phrases if p)
|
|
139
|
+
if content_text:
|
|
140
|
+
content = self._synthesize_segment(
|
|
141
|
+
text=content_text,
|
|
142
|
+
voice=narrator_voice,
|
|
143
|
+
speed=speed,
|
|
144
|
+
index=segment_index
|
|
145
|
+
)
|
|
146
|
+
segments.append(content)
|
|
147
|
+
segment_index += 1
|
|
148
|
+
|
|
149
|
+
# 内容后留白
|
|
150
|
+
silence_2s = self._generate_silence(section_silence)
|
|
151
|
+
segments.append(silence_2s)
|
|
152
|
+
|
|
153
|
+
# 2. Alternatives 部分
|
|
154
|
+
alternatives = keypoint.get("alternatives", [])
|
|
155
|
+
if alternatives:
|
|
156
|
+
# 引导语
|
|
157
|
+
lead_in = self._synthesize_segment(
|
|
158
|
+
text="You can also say",
|
|
159
|
+
voice=narrator_voice,
|
|
160
|
+
speed=speed,
|
|
161
|
+
index=segment_index
|
|
162
|
+
)
|
|
163
|
+
segments.append(lead_in)
|
|
164
|
+
segment_index += 1
|
|
165
|
+
|
|
166
|
+
# 引导语后留白
|
|
167
|
+
silence_1s = self._generate_silence(lead_in_silence)
|
|
168
|
+
segments.append(silence_1s)
|
|
169
|
+
|
|
170
|
+
# 内容
|
|
171
|
+
content_text = ". ".join(alt for alt in alternatives if alt)
|
|
172
|
+
if content_text:
|
|
173
|
+
content = self._synthesize_segment(
|
|
174
|
+
text=content_text,
|
|
175
|
+
voice=narrator_voice,
|
|
176
|
+
speed=speed,
|
|
177
|
+
index=segment_index
|
|
178
|
+
)
|
|
179
|
+
segments.append(content)
|
|
180
|
+
segment_index += 1
|
|
181
|
+
|
|
182
|
+
# 内容后留白
|
|
183
|
+
silence_2s = self._generate_silence(section_silence)
|
|
184
|
+
segments.append(silence_2s)
|
|
185
|
+
|
|
186
|
+
# 3. Dialogues 部分
|
|
187
|
+
examples = keypoint.get("examples", [])
|
|
188
|
+
if examples:
|
|
189
|
+
# 引导语
|
|
190
|
+
lead_in = self._synthesize_segment(
|
|
191
|
+
text="Dialogue",
|
|
192
|
+
voice=narrator_voice,
|
|
193
|
+
speed=speed,
|
|
194
|
+
index=segment_index
|
|
195
|
+
)
|
|
196
|
+
segments.append(lead_in)
|
|
197
|
+
segment_index += 1
|
|
198
|
+
|
|
199
|
+
# 引导语后留白
|
|
200
|
+
silence_1s = self._generate_silence(lead_in_silence)
|
|
201
|
+
segments.append(silence_1s)
|
|
202
|
+
|
|
203
|
+
# 对话内容
|
|
204
|
+
silence_05s = self._generate_silence(dialogue_silence)
|
|
205
|
+
for example in examples:
|
|
206
|
+
dialogue = example.get("dialogue", [])
|
|
207
|
+
for line in dialogue:
|
|
208
|
+
if ":" in line:
|
|
209
|
+
speaker, text = line.split(":", 1)
|
|
210
|
+
speaker = speaker.strip()
|
|
211
|
+
text = text.strip()
|
|
212
|
+
|
|
213
|
+
if not text:
|
|
214
|
+
continue
|
|
215
|
+
|
|
216
|
+
# A = henry (男声), B = catherine (女声)
|
|
217
|
+
voice = voice_a if speaker.upper() == "A" else voice_b
|
|
218
|
+
|
|
219
|
+
segment = self._synthesize_segment(
|
|
220
|
+
text=text,
|
|
221
|
+
voice=voice,
|
|
222
|
+
speed=speed,
|
|
223
|
+
index=segment_index
|
|
224
|
+
)
|
|
225
|
+
segments.append(segment)
|
|
226
|
+
segment_index += 1
|
|
227
|
+
|
|
228
|
+
# 对话行之间留白
|
|
229
|
+
segments.append(silence_05s)
|
|
230
|
+
|
|
231
|
+
if not segments:
|
|
232
|
+
return CompositionResult(
|
|
233
|
+
success=False,
|
|
234
|
+
error_message="No audio content to compose"
|
|
235
|
+
)
|
|
236
|
+
|
|
237
|
+
# 4. 拼接所有片段
|
|
238
|
+
final_audio = self._concatenate_segments(segments, output_path)
|
|
239
|
+
|
|
240
|
+
# 5. 获取时长
|
|
241
|
+
duration = get_audio_duration(final_audio, self.ffmpeg_path)
|
|
242
|
+
|
|
243
|
+
return CompositionResult(
|
|
244
|
+
success=True,
|
|
245
|
+
audio_path=final_audio,
|
|
246
|
+
duration_seconds=duration
|
|
247
|
+
)
|
|
248
|
+
|
|
249
|
+
except Exception as e:
|
|
250
|
+
return CompositionResult(
|
|
251
|
+
success=False,
|
|
252
|
+
error_message=str(e)
|
|
253
|
+
)
|
|
254
|
+
|
|
255
|
+
def _synthesize_segment(
|
|
256
|
+
self,
|
|
257
|
+
text: str,
|
|
258
|
+
voice: str,
|
|
259
|
+
speed: float,
|
|
260
|
+
index: int
|
|
261
|
+
) -> Path:
|
|
262
|
+
"""
|
|
263
|
+
合成单个音频片段
|
|
264
|
+
|
|
265
|
+
Args:
|
|
266
|
+
text: 文本
|
|
267
|
+
voice: 音色
|
|
268
|
+
speed: 语速
|
|
269
|
+
index: 片段索引
|
|
270
|
+
|
|
271
|
+
Returns:
|
|
272
|
+
音频文件路径
|
|
273
|
+
"""
|
|
274
|
+
output_path = self.temp_dir / f"segment_{index}.mp3"
|
|
275
|
+
|
|
276
|
+
result = self.tts.synthesize(
|
|
277
|
+
text=text,
|
|
278
|
+
output_path=output_path,
|
|
279
|
+
voice=voice,
|
|
280
|
+
speed=speed
|
|
281
|
+
)
|
|
282
|
+
|
|
283
|
+
if not result.success:
|
|
284
|
+
raise RuntimeError(f"TTS synthesis failed: {result.error_message}")
|
|
285
|
+
|
|
286
|
+
return output_path
|
|
287
|
+
|
|
288
|
+
def _generate_silence(self, duration: float) -> Path:
|
|
289
|
+
"""
|
|
290
|
+
生成空白音频
|
|
291
|
+
|
|
292
|
+
Args:
|
|
293
|
+
duration: 时长(秒)
|
|
294
|
+
|
|
295
|
+
Returns:
|
|
296
|
+
空白音频文件路径
|
|
297
|
+
"""
|
|
298
|
+
output_path = self.temp_dir / f"silence_{duration}.mp3"
|
|
299
|
+
|
|
300
|
+
if output_path.exists():
|
|
301
|
+
return output_path
|
|
302
|
+
|
|
303
|
+
cmd = [
|
|
304
|
+
self.ffmpeg_path,
|
|
305
|
+
"-f", "lavfi",
|
|
306
|
+
"-i", f"anullsrc=r=16000:cl=mono",
|
|
307
|
+
"-t", str(duration),
|
|
308
|
+
"-y",
|
|
309
|
+
str(output_path)
|
|
310
|
+
]
|
|
311
|
+
|
|
312
|
+
result = subprocess.run(
|
|
313
|
+
cmd,
|
|
314
|
+
capture_output=True,
|
|
315
|
+
text=True,
|
|
316
|
+
timeout=30
|
|
317
|
+
)
|
|
318
|
+
|
|
319
|
+
if result.returncode != 0:
|
|
320
|
+
raise RuntimeError(f"Failed to generate silence: {result.stderr}")
|
|
321
|
+
|
|
322
|
+
return output_path
|
|
323
|
+
|
|
324
|
+
def _concatenate_segments(
|
|
325
|
+
self,
|
|
326
|
+
segments: List[Path],
|
|
327
|
+
output_path: Path
|
|
328
|
+
) -> Path:
|
|
329
|
+
"""
|
|
330
|
+
拼接多个音频片段
|
|
331
|
+
|
|
332
|
+
Args:
|
|
333
|
+
segments: 音频片段路径列表
|
|
334
|
+
output_path: 输出文件路径
|
|
335
|
+
|
|
336
|
+
Returns:
|
|
337
|
+
拼接后的音频文件路径
|
|
338
|
+
"""
|
|
339
|
+
# 创建文件列表
|
|
340
|
+
list_file = self.temp_dir / "concat_list.txt"
|
|
341
|
+
with open(list_file, "w") as f:
|
|
342
|
+
for seg in segments:
|
|
343
|
+
# 需要转义路径中的特殊字符
|
|
344
|
+
escaped_path = str(seg).replace("'", "'\\''")
|
|
345
|
+
f.write(f"file '{escaped_path}'\n")
|
|
346
|
+
|
|
347
|
+
cmd = [
|
|
348
|
+
self.ffmpeg_path,
|
|
349
|
+
"-f", "concat",
|
|
350
|
+
"-safe", "0",
|
|
351
|
+
"-i", str(list_file),
|
|
352
|
+
"-c", "copy",
|
|
353
|
+
"-y",
|
|
354
|
+
str(output_path)
|
|
355
|
+
]
|
|
356
|
+
|
|
357
|
+
result = subprocess.run(
|
|
358
|
+
cmd,
|
|
359
|
+
capture_output=True,
|
|
360
|
+
text=True,
|
|
361
|
+
timeout=120
|
|
362
|
+
)
|
|
363
|
+
|
|
364
|
+
if result.returncode != 0:
|
|
365
|
+
raise RuntimeError(f"Failed to concatenate audio: {result.stderr}")
|
|
366
|
+
|
|
367
|
+
return output_path
|
|
@@ -0,0 +1,331 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
音频格式转换器 - 将 MP3 转换为飞书语音格式
|
|
4
|
+
|
|
5
|
+
飞书语音消息要求:
|
|
6
|
+
- 格式: Opus / Speex / AAC / AMR
|
|
7
|
+
- 采样率: 8000Hz / 16000Hz
|
|
8
|
+
- 声道: 单声道
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import subprocess
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
from typing import Optional
|
|
14
|
+
from dataclasses import dataclass
|
|
15
|
+
|
|
16
|
+
from .utils import get_ffmpeg_path, get_audio_duration
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass
|
|
20
|
+
class ConversionResult:
|
|
21
|
+
"""转换结果"""
|
|
22
|
+
success: bool
|
|
23
|
+
output_path: Optional[Path] = None
|
|
24
|
+
error_message: Optional[str] = None
|
|
25
|
+
duration_seconds: Optional[float] = None
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class AudioConverter:
|
|
29
|
+
"""音频格式转换器"""
|
|
30
|
+
|
|
31
|
+
# 飞书支持的语音格式
|
|
32
|
+
SUPPORTED_FORMATS = ["opus", "speex", "aac", "amr"]
|
|
33
|
+
SUPPORTED_SAMPLE_RATES = [8000, 16000]
|
|
34
|
+
|
|
35
|
+
def __init__(self, ffmpeg_path: Optional[str] = None):
|
|
36
|
+
"""
|
|
37
|
+
初始化转换器
|
|
38
|
+
|
|
39
|
+
Args:
|
|
40
|
+
ffmpeg_path: ffmpeg 可执行文件路径(默认自动检测)
|
|
41
|
+
"""
|
|
42
|
+
self.ffmpeg_path = ffmpeg_path or get_ffmpeg_path()
|
|
43
|
+
|
|
44
|
+
def convert_to_voice(
|
|
45
|
+
self,
|
|
46
|
+
input_path: Path,
|
|
47
|
+
output_path: Optional[Path] = None,
|
|
48
|
+
format: str = "opus",
|
|
49
|
+
sample_rate: int = 16000,
|
|
50
|
+
bitrate: str = "24k"
|
|
51
|
+
) -> ConversionResult:
|
|
52
|
+
"""
|
|
53
|
+
将音频文件转换为飞书语音格式
|
|
54
|
+
|
|
55
|
+
Args:
|
|
56
|
+
input_path: 输入文件路径(支持 MP3, WAV, M4A 等)
|
|
57
|
+
output_path: 输出文件路径(可选,默认同目录更换扩展名)
|
|
58
|
+
format: 输出格式(opus, speex, aac, amr)
|
|
59
|
+
sample_rate: 采样率(8000 或 16000)
|
|
60
|
+
bitrate: 比特率(默认 24k,适合语音)
|
|
61
|
+
|
|
62
|
+
Returns:
|
|
63
|
+
ConversionResult: 转换结果
|
|
64
|
+
"""
|
|
65
|
+
# 参数验证
|
|
66
|
+
if format not in self.SUPPORTED_FORMATS:
|
|
67
|
+
return ConversionResult(
|
|
68
|
+
success=False,
|
|
69
|
+
error_message=f"Unsupported format: {format}. Supported: {self.SUPPORTED_FORMATS}"
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
if sample_rate not in self.SUPPORTED_SAMPLE_RATES:
|
|
73
|
+
return ConversionResult(
|
|
74
|
+
success=False,
|
|
75
|
+
error_message=f"Unsupported sample rate: {sample_rate}. Supported: {self.SUPPORTED_SAMPLE_RATES}"
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
input_path = Path(input_path)
|
|
79
|
+
if not input_path.exists():
|
|
80
|
+
return ConversionResult(
|
|
81
|
+
success=False,
|
|
82
|
+
error_message=f"Input file not found: {input_path}"
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
# 确定输出路径
|
|
86
|
+
if output_path is None:
|
|
87
|
+
output_path = input_path.with_suffix(f".{format}")
|
|
88
|
+
else:
|
|
89
|
+
output_path = Path(output_path)
|
|
90
|
+
|
|
91
|
+
# 确保输出目录存在
|
|
92
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
93
|
+
|
|
94
|
+
# 构建 ffmpeg 命令
|
|
95
|
+
codec_map = {
|
|
96
|
+
"opus": "libopus",
|
|
97
|
+
"speex": "libspeex",
|
|
98
|
+
"aac": "aac",
|
|
99
|
+
"amr": "libvo_amrwbenc"
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
cmd = [
|
|
103
|
+
self.ffmpeg_path,
|
|
104
|
+
"-i", str(input_path), # 输入文件
|
|
105
|
+
"-acodec", codec_map[format], # 编码器
|
|
106
|
+
"-ar", str(sample_rate), # 采样率
|
|
107
|
+
"-ac", "1", # 单声道
|
|
108
|
+
"-ab", bitrate, # 比特率
|
|
109
|
+
"-y", # 覆盖输出文件
|
|
110
|
+
str(output_path)
|
|
111
|
+
]
|
|
112
|
+
|
|
113
|
+
# 特定格式优化
|
|
114
|
+
if format == "opus":
|
|
115
|
+
# Opus 针对语音优化
|
|
116
|
+
cmd.extend(["-application", "audio"])
|
|
117
|
+
elif format == "speex":
|
|
118
|
+
# Speex 针对语音优化
|
|
119
|
+
cmd.extend(["-compression_level", "10"])
|
|
120
|
+
|
|
121
|
+
try:
|
|
122
|
+
result = subprocess.run(
|
|
123
|
+
cmd,
|
|
124
|
+
capture_output=True,
|
|
125
|
+
text=True,
|
|
126
|
+
timeout=60 # 60秒超时
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
if result.returncode != 0:
|
|
130
|
+
return ConversionResult(
|
|
131
|
+
success=False,
|
|
132
|
+
error_message=f"ffmpeg error: {result.stderr}"
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
# 获取音频时长
|
|
136
|
+
duration = get_audio_duration(output_path, self.ffmpeg_path)
|
|
137
|
+
|
|
138
|
+
return ConversionResult(
|
|
139
|
+
success=True,
|
|
140
|
+
output_path=output_path,
|
|
141
|
+
duration_seconds=duration
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
except subprocess.TimeoutExpired:
|
|
145
|
+
return ConversionResult(
|
|
146
|
+
success=False,
|
|
147
|
+
error_message="Conversion timeout (>60s)"
|
|
148
|
+
)
|
|
149
|
+
except Exception as e:
|
|
150
|
+
return ConversionResult(
|
|
151
|
+
success=False,
|
|
152
|
+
error_message=str(e)
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
def batch_convert(
|
|
156
|
+
self,
|
|
157
|
+
input_dir: Path,
|
|
158
|
+
output_dir: Optional[Path] = None,
|
|
159
|
+
format: str = "opus",
|
|
160
|
+
sample_rate: int = 16000
|
|
161
|
+
) -> dict:
|
|
162
|
+
"""
|
|
163
|
+
批量转换目录中的音频文件
|
|
164
|
+
|
|
165
|
+
Args:
|
|
166
|
+
input_dir: 输入目录
|
|
167
|
+
output_dir: 输出目录(可选,默认在输入目录下创建 voice/ 子目录)
|
|
168
|
+
format: 输出格式
|
|
169
|
+
sample_rate: 采样率
|
|
170
|
+
|
|
171
|
+
Returns:
|
|
172
|
+
转换结果字典 {原文件名: ConversionResult}
|
|
173
|
+
"""
|
|
174
|
+
input_dir = Path(input_dir)
|
|
175
|
+
if output_dir is None:
|
|
176
|
+
output_dir = input_dir / "voice"
|
|
177
|
+
else:
|
|
178
|
+
output_dir = Path(output_dir)
|
|
179
|
+
|
|
180
|
+
results = {}
|
|
181
|
+
|
|
182
|
+
# 支持的输入格式
|
|
183
|
+
input_extensions = [".mp3", ".wav", ".m4a", ".flac", ".ogg"]
|
|
184
|
+
|
|
185
|
+
for input_file in input_dir.glob("*"):
|
|
186
|
+
if input_file.suffix.lower() not in input_extensions:
|
|
187
|
+
continue
|
|
188
|
+
|
|
189
|
+
output_file = output_dir / input_file.with_suffix(f".{format}").name
|
|
190
|
+
results[input_file.name] = self.convert_to_voice(
|
|
191
|
+
input_path=input_file,
|
|
192
|
+
output_path=output_file,
|
|
193
|
+
format=format,
|
|
194
|
+
sample_rate=sample_rate
|
|
195
|
+
)
|
|
196
|
+
|
|
197
|
+
return results
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def convert_to_feishu_voice(
|
|
201
|
+
self,
|
|
202
|
+
input_path: Path,
|
|
203
|
+
output_path: Optional[Path] = None,
|
|
204
|
+
sample_rate: int = 16000,
|
|
205
|
+
bitrate: str = "24k"
|
|
206
|
+
) -> ConversionResult:
|
|
207
|
+
"""
|
|
208
|
+
将音频转换为飞书语音气泡格式 (.m4a + libopus 编码)
|
|
209
|
+
|
|
210
|
+
这个格式组合的特点:
|
|
211
|
+
- 文件扩展名: .m4a (MP4 容器)
|
|
212
|
+
- 音频编码: libopus
|
|
213
|
+
- 飞书插件会探测文件头,识别 libopus 编码触发语音气泡
|
|
214
|
+
- 其他平台作为普通音频附件播放
|
|
215
|
+
|
|
216
|
+
Args:
|
|
217
|
+
input_path: 输入文件路径
|
|
218
|
+
output_path: 输出文件路径(可选,默认同目录更换扩展名为 .m4a)
|
|
219
|
+
sample_rate: 采样率(默认 16000)
|
|
220
|
+
bitrate: 比特率(默认 24k)
|
|
221
|
+
|
|
222
|
+
Returns:
|
|
223
|
+
ConversionResult: 转换结果
|
|
224
|
+
"""
|
|
225
|
+
input_path = Path(input_path)
|
|
226
|
+
if not input_path.exists():
|
|
227
|
+
return ConversionResult(
|
|
228
|
+
success=False,
|
|
229
|
+
error_message=f"Input file not found: {input_path}"
|
|
230
|
+
)
|
|
231
|
+
|
|
232
|
+
# 确定输出路径
|
|
233
|
+
if output_path is None:
|
|
234
|
+
output_path = input_path.with_suffix(".m4a")
|
|
235
|
+
else:
|
|
236
|
+
output_path = Path(output_path)
|
|
237
|
+
|
|
238
|
+
# 确保输出目录存在
|
|
239
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
240
|
+
|
|
241
|
+
# 构建 ffmpeg 命令
|
|
242
|
+
# 使用 -c:a libopus 编码,输出到 .m4a 容器
|
|
243
|
+
cmd = [
|
|
244
|
+
self.ffmpeg_path,
|
|
245
|
+
"-i", str(input_path),
|
|
246
|
+
"-c:a", "libopus", # Opus 编码器
|
|
247
|
+
"-ar", str(sample_rate), # 采样率
|
|
248
|
+
"-ac", "1", # 单声道
|
|
249
|
+
"-b:a", bitrate, # 比特率
|
|
250
|
+
"-y", # 覆盖输出文件
|
|
251
|
+
str(output_path)
|
|
252
|
+
]
|
|
253
|
+
|
|
254
|
+
try:
|
|
255
|
+
result = subprocess.run(
|
|
256
|
+
cmd,
|
|
257
|
+
capture_output=True,
|
|
258
|
+
text=True,
|
|
259
|
+
timeout=60
|
|
260
|
+
)
|
|
261
|
+
|
|
262
|
+
if result.returncode != 0:
|
|
263
|
+
return ConversionResult(
|
|
264
|
+
success=False,
|
|
265
|
+
error_message=f"ffmpeg error: {result.stderr}"
|
|
266
|
+
)
|
|
267
|
+
|
|
268
|
+
# 获取音频时长
|
|
269
|
+
duration = get_audio_duration(output_path, self.ffmpeg_path)
|
|
270
|
+
|
|
271
|
+
return ConversionResult(
|
|
272
|
+
success=True,
|
|
273
|
+
output_path=output_path,
|
|
274
|
+
duration_seconds=duration
|
|
275
|
+
)
|
|
276
|
+
|
|
277
|
+
except subprocess.TimeoutExpired:
|
|
278
|
+
return ConversionResult(
|
|
279
|
+
success=False,
|
|
280
|
+
error_message="Conversion timeout (>60s)"
|
|
281
|
+
)
|
|
282
|
+
except Exception as e:
|
|
283
|
+
return ConversionResult(
|
|
284
|
+
success=False,
|
|
285
|
+
error_message=str(e)
|
|
286
|
+
)
|
|
287
|
+
|
|
288
|
+
|
|
289
|
+
# 便捷函数
|
|
290
|
+
def convert_mp3_to_opus(
|
|
291
|
+
input_path: Path,
|
|
292
|
+
output_path: Optional[Path] = None
|
|
293
|
+
) -> ConversionResult:
|
|
294
|
+
"""
|
|
295
|
+
将 MP3 转换为 Opus 格式(飞书推荐)
|
|
296
|
+
|
|
297
|
+
Args:
|
|
298
|
+
input_path: MP3 文件路径
|
|
299
|
+
output_path: 输出路径(可选)
|
|
300
|
+
|
|
301
|
+
Returns:
|
|
302
|
+
ConversionResult
|
|
303
|
+
"""
|
|
304
|
+
converter = AudioConverter()
|
|
305
|
+
return converter.convert_to_voice(
|
|
306
|
+
input_path=input_path,
|
|
307
|
+
output_path=output_path,
|
|
308
|
+
format="opus",
|
|
309
|
+
sample_rate=16000
|
|
310
|
+
)
|
|
311
|
+
|
|
312
|
+
|
|
313
|
+
def convert_to_feishu_voice(
|
|
314
|
+
input_path: Path,
|
|
315
|
+
output_path: Optional[Path] = None
|
|
316
|
+
) -> ConversionResult:
|
|
317
|
+
"""
|
|
318
|
+
将音频转换为飞书语音气泡格式 (.m4a + libopus)
|
|
319
|
+
|
|
320
|
+
Args:
|
|
321
|
+
input_path: 输入文件路径
|
|
322
|
+
output_path: 输出路径(可选,默认 .m4a)
|
|
323
|
+
|
|
324
|
+
Returns:
|
|
325
|
+
ConversionResult
|
|
326
|
+
"""
|
|
327
|
+
converter = AudioConverter()
|
|
328
|
+
return converter.convert_to_feishu_voice(
|
|
329
|
+
input_path=input_path,
|
|
330
|
+
output_path=output_path
|
|
331
|
+
)
|