@rookiestar/eng-lang-tutor 1.0.3 → 1.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of @rookiestar/eng-lang-tutor might be problematic. Click here for more details.
- package/package.json +1 -1
- package/requirements.txt +2 -0
- package/scripts/__pycache__/audio_composer.cpython-313.pyc +0 -0
- package/scripts/__pycache__/audio_converter.cpython-313.pyc +0 -0
- package/scripts/__pycache__/audio_enhancer.cpython-313.pyc +0 -0
- package/scripts/__pycache__/state_manager.cpython-313.pyc +0 -0
- package/scripts/audio_composer.py +389 -0
- package/scripts/audio_converter.py +245 -0
- package/scripts/cli.py +21 -1
- package/scripts/feishu_voice.py +421 -0
- package/scripts/state_manager.py +90 -1
- package/scripts/tts/__pycache__/manager.cpython-313.pyc +0 -0
- package/scripts/tts/manager.py +14 -4
- package/scripts/tts/providers/__pycache__/xunfei.cpython-313.pyc +0 -0
- package/scripts/tts/providers/xunfei.py +10 -1
- package/scripts/__pycache__/cli.cpython-313.pyc +0 -0
- package/scripts/__pycache__/command_parser.cpython-313.pyc +0 -0
- package/scripts/__pycache__/constants.cpython-313.pyc +0 -0
- package/scripts/__pycache__/cron_push.cpython-313.pyc +0 -0
- package/scripts/__pycache__/dedup.cpython-313.pyc +0 -0
- package/scripts/__pycache__/gamification.cpython-313.pyc +0 -0
- package/scripts/__pycache__/scorer.cpython-313.pyc +0 -0
package/package.json
CHANGED
package/requirements.txt
CHANGED
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
# Core dependencies
|
|
2
2
|
websocket-client>=1.6.0 # For XunFei TTS WebSocket API
|
|
3
|
+
certifi>=2024.0.0 # SSL certificate bundle for HTTPS/WebSocket connections
|
|
4
|
+
aiohttp>=3.8.0 # Async HTTP client for Feishu API
|
|
3
5
|
|
|
4
6
|
# Optional: Edge-TTS support (uncomment if needed)
|
|
5
7
|
# edge-tts>=6.1.0
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1,389 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
知识点音频合成器 - 将多个音频片段合并为单个文件
|
|
4
|
+
|
|
5
|
+
音频结构:
|
|
6
|
+
- expressions: 引导语 [1s] 内容 [2s]
|
|
7
|
+
- alternatives: 引导语 [1s] 内容 [2s]
|
|
8
|
+
- dialogues: 引导语 [1s] 对话行1 [0.5s] 对话行2 ...
|
|
9
|
+
|
|
10
|
+
使用示例:
|
|
11
|
+
from scripts.audio_composer import AudioComposer
|
|
12
|
+
from scripts.tts import TTSManager
|
|
13
|
+
|
|
14
|
+
tts = TTSManager.from_env()
|
|
15
|
+
composer = AudioComposer(tts)
|
|
16
|
+
|
|
17
|
+
result = composer.compose_keypoint_audio(keypoint, Path("output.mp3"))
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
import subprocess
|
|
21
|
+
import tempfile
|
|
22
|
+
import shutil
|
|
23
|
+
from pathlib import Path
|
|
24
|
+
from typing import Optional, List
|
|
25
|
+
from dataclasses import dataclass
|
|
26
|
+
|
|
27
|
+
try:
|
|
28
|
+
from .tts import TTSManager
|
|
29
|
+
except ImportError:
|
|
30
|
+
from tts import TTSManager
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@dataclass
|
|
34
|
+
class CompositionResult:
|
|
35
|
+
"""音频合成结果"""
|
|
36
|
+
success: bool
|
|
37
|
+
audio_path: Optional[Path] = None
|
|
38
|
+
duration_seconds: Optional[float] = None
|
|
39
|
+
error_message: Optional[str] = None
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class AudioComposer:
|
|
43
|
+
"""
|
|
44
|
+
知识点音频合成器
|
|
45
|
+
|
|
46
|
+
将 expressions + alternatives + dialogues 合并为单个音频文件
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
def __init__(
|
|
50
|
+
self,
|
|
51
|
+
tts_manager: TTSManager,
|
|
52
|
+
ffmpeg_path: Optional[str] = None
|
|
53
|
+
):
|
|
54
|
+
"""
|
|
55
|
+
初始化音频合成器
|
|
56
|
+
|
|
57
|
+
Args:
|
|
58
|
+
tts_manager: TTS 管理器实例
|
|
59
|
+
ffmpeg_path: ffmpeg 可执行文件路径(默认自动检测)
|
|
60
|
+
"""
|
|
61
|
+
self.tts = tts_manager
|
|
62
|
+
self.ffmpeg_path = ffmpeg_path or shutil.which("ffmpeg")
|
|
63
|
+
if not self.ffmpeg_path:
|
|
64
|
+
raise RuntimeError(
|
|
65
|
+
"ffmpeg not found. Install it with: brew install ffmpeg (macOS) "
|
|
66
|
+
"or apt-get install ffmpeg (Ubuntu)"
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
# 创建临时目录用于存放中间文件
|
|
70
|
+
self.temp_dir = Path(tempfile.mkdtemp(prefix="audio_composer_"))
|
|
71
|
+
|
|
72
|
+
def __del__(self):
|
|
73
|
+
"""清理临时目录"""
|
|
74
|
+
if self.temp_dir.exists():
|
|
75
|
+
shutil.rmtree(self.temp_dir, ignore_errors=True)
|
|
76
|
+
|
|
77
|
+
def compose_keypoint_audio(
|
|
78
|
+
self,
|
|
79
|
+
keypoint: dict,
|
|
80
|
+
output_path: Path,
|
|
81
|
+
lead_in_silence: float = 1.0, # 引导语后留白
|
|
82
|
+
section_silence: float = 2.0, # 内容后留白(段落间隔)
|
|
83
|
+
dialogue_silence: float = 0.5, # 对话行之间留白
|
|
84
|
+
narrator_voice: str = "henry", # 旁白音色(男声)
|
|
85
|
+
voice_a: str = "mary", # 对话 A 音色(女声)
|
|
86
|
+
voice_b: str = "henry", # 对话 B 音色(男声,沉稳)
|
|
87
|
+
speed: float = 0.9 # 语速
|
|
88
|
+
) -> CompositionResult:
|
|
89
|
+
"""
|
|
90
|
+
合成知识点音频
|
|
91
|
+
|
|
92
|
+
Args:
|
|
93
|
+
keypoint: 知识点数据
|
|
94
|
+
output_path: 输出文件路径
|
|
95
|
+
lead_in_silence: 引导语后留白时长(秒)
|
|
96
|
+
section_silence: 内容后留白时长(秒)
|
|
97
|
+
dialogue_silence: 对话行之间留白时长(秒)
|
|
98
|
+
narrator_voice: 旁白音色
|
|
99
|
+
voice_a: 对话 A 角色音色
|
|
100
|
+
voice_b: 对话 B 角色音色
|
|
101
|
+
speed: 语速
|
|
102
|
+
|
|
103
|
+
Returns:
|
|
104
|
+
CompositionResult: 合成结果
|
|
105
|
+
"""
|
|
106
|
+
try:
|
|
107
|
+
output_path = Path(output_path)
|
|
108
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
109
|
+
|
|
110
|
+
segments: List[Path] = []
|
|
111
|
+
segment_index = 0
|
|
112
|
+
|
|
113
|
+
# 1. Expressions 部分
|
|
114
|
+
expressions = keypoint.get("expressions", [])
|
|
115
|
+
if expressions:
|
|
116
|
+
# 引导语
|
|
117
|
+
lead_in = self._synthesize_segment(
|
|
118
|
+
text="Key expressions",
|
|
119
|
+
voice=narrator_voice,
|
|
120
|
+
speed=speed,
|
|
121
|
+
index=segment_index
|
|
122
|
+
)
|
|
123
|
+
segments.append(lead_in)
|
|
124
|
+
segment_index += 1
|
|
125
|
+
|
|
126
|
+
# 引导语后留白
|
|
127
|
+
silence_1s = self._generate_silence(lead_in_silence)
|
|
128
|
+
segments.append(silence_1s)
|
|
129
|
+
|
|
130
|
+
# 内容
|
|
131
|
+
phrases = [expr.get("phrase", "") for expr in expressions]
|
|
132
|
+
content_text = ". ".join(p for p in phrases if p)
|
|
133
|
+
if content_text:
|
|
134
|
+
content = self._synthesize_segment(
|
|
135
|
+
text=content_text,
|
|
136
|
+
voice=narrator_voice,
|
|
137
|
+
speed=speed,
|
|
138
|
+
index=segment_index
|
|
139
|
+
)
|
|
140
|
+
segments.append(content)
|
|
141
|
+
segment_index += 1
|
|
142
|
+
|
|
143
|
+
# 内容后留白
|
|
144
|
+
silence_2s = self._generate_silence(section_silence)
|
|
145
|
+
segments.append(silence_2s)
|
|
146
|
+
|
|
147
|
+
# 2. Alternatives 部分
|
|
148
|
+
alternatives = keypoint.get("alternatives", [])
|
|
149
|
+
if alternatives:
|
|
150
|
+
# 引导语
|
|
151
|
+
lead_in = self._synthesize_segment(
|
|
152
|
+
text="You can also say",
|
|
153
|
+
voice=narrator_voice,
|
|
154
|
+
speed=speed,
|
|
155
|
+
index=segment_index
|
|
156
|
+
)
|
|
157
|
+
segments.append(lead_in)
|
|
158
|
+
segment_index += 1
|
|
159
|
+
|
|
160
|
+
# 引导语后留白
|
|
161
|
+
silence_1s = self._generate_silence(lead_in_silence)
|
|
162
|
+
segments.append(silence_1s)
|
|
163
|
+
|
|
164
|
+
# 内容
|
|
165
|
+
content_text = ". ".join(alt for alt in alternatives if alt)
|
|
166
|
+
if content_text:
|
|
167
|
+
content = self._synthesize_segment(
|
|
168
|
+
text=content_text,
|
|
169
|
+
voice=narrator_voice,
|
|
170
|
+
speed=speed,
|
|
171
|
+
index=segment_index
|
|
172
|
+
)
|
|
173
|
+
segments.append(content)
|
|
174
|
+
segment_index += 1
|
|
175
|
+
|
|
176
|
+
# 内容后留白
|
|
177
|
+
silence_2s = self._generate_silence(section_silence)
|
|
178
|
+
segments.append(silence_2s)
|
|
179
|
+
|
|
180
|
+
# 3. Dialogues 部分
|
|
181
|
+
examples = keypoint.get("examples", [])
|
|
182
|
+
if examples:
|
|
183
|
+
# 引导语
|
|
184
|
+
lead_in = self._synthesize_segment(
|
|
185
|
+
text="Dialogue",
|
|
186
|
+
voice=narrator_voice,
|
|
187
|
+
speed=speed,
|
|
188
|
+
index=segment_index
|
|
189
|
+
)
|
|
190
|
+
segments.append(lead_in)
|
|
191
|
+
segment_index += 1
|
|
192
|
+
|
|
193
|
+
# 引导语后留白
|
|
194
|
+
silence_1s = self._generate_silence(lead_in_silence)
|
|
195
|
+
segments.append(silence_1s)
|
|
196
|
+
|
|
197
|
+
# 对话内容
|
|
198
|
+
silence_05s = self._generate_silence(dialogue_silence)
|
|
199
|
+
for example in examples:
|
|
200
|
+
dialogue = example.get("dialogue", [])
|
|
201
|
+
for line in dialogue:
|
|
202
|
+
if ":" in line:
|
|
203
|
+
speaker, text = line.split(":", 1)
|
|
204
|
+
speaker = speaker.strip()
|
|
205
|
+
text = text.strip()
|
|
206
|
+
|
|
207
|
+
if not text:
|
|
208
|
+
continue
|
|
209
|
+
|
|
210
|
+
# A = henry (男声), B = catherine (女声)
|
|
211
|
+
voice = voice_a if speaker.upper() == "A" else voice_b
|
|
212
|
+
|
|
213
|
+
segment = self._synthesize_segment(
|
|
214
|
+
text=text,
|
|
215
|
+
voice=voice,
|
|
216
|
+
speed=speed,
|
|
217
|
+
index=segment_index
|
|
218
|
+
)
|
|
219
|
+
segments.append(segment)
|
|
220
|
+
segment_index += 1
|
|
221
|
+
|
|
222
|
+
# 对话行之间留白
|
|
223
|
+
segments.append(silence_05s)
|
|
224
|
+
|
|
225
|
+
if not segments:
|
|
226
|
+
return CompositionResult(
|
|
227
|
+
success=False,
|
|
228
|
+
error_message="No audio content to compose"
|
|
229
|
+
)
|
|
230
|
+
|
|
231
|
+
# 4. 拼接所有片段
|
|
232
|
+
final_audio = self._concatenate_segments(segments, output_path)
|
|
233
|
+
|
|
234
|
+
# 5. 获取时长
|
|
235
|
+
duration = self._get_duration(final_audio)
|
|
236
|
+
|
|
237
|
+
return CompositionResult(
|
|
238
|
+
success=True,
|
|
239
|
+
audio_path=final_audio,
|
|
240
|
+
duration_seconds=duration
|
|
241
|
+
)
|
|
242
|
+
|
|
243
|
+
except Exception as e:
|
|
244
|
+
return CompositionResult(
|
|
245
|
+
success=False,
|
|
246
|
+
error_message=str(e)
|
|
247
|
+
)
|
|
248
|
+
|
|
249
|
+
def _synthesize_segment(
|
|
250
|
+
self,
|
|
251
|
+
text: str,
|
|
252
|
+
voice: str,
|
|
253
|
+
speed: float,
|
|
254
|
+
index: int
|
|
255
|
+
) -> Path:
|
|
256
|
+
"""
|
|
257
|
+
合成单个音频片段
|
|
258
|
+
|
|
259
|
+
Args:
|
|
260
|
+
text: 文本
|
|
261
|
+
voice: 音色
|
|
262
|
+
speed: 语速
|
|
263
|
+
index: 片段索引
|
|
264
|
+
|
|
265
|
+
Returns:
|
|
266
|
+
音频文件路径
|
|
267
|
+
"""
|
|
268
|
+
output_path = self.temp_dir / f"segment_{index}.mp3"
|
|
269
|
+
|
|
270
|
+
result = self.tts.synthesize(
|
|
271
|
+
text=text,
|
|
272
|
+
output_path=output_path,
|
|
273
|
+
voice=voice,
|
|
274
|
+
speed=speed
|
|
275
|
+
)
|
|
276
|
+
|
|
277
|
+
if not result.success:
|
|
278
|
+
raise RuntimeError(f"TTS synthesis failed: {result.error_message}")
|
|
279
|
+
|
|
280
|
+
return output_path
|
|
281
|
+
|
|
282
|
+
def _generate_silence(self, duration: float) -> Path:
|
|
283
|
+
"""
|
|
284
|
+
生成空白音频
|
|
285
|
+
|
|
286
|
+
Args:
|
|
287
|
+
duration: 时长(秒)
|
|
288
|
+
|
|
289
|
+
Returns:
|
|
290
|
+
空白音频文件路径
|
|
291
|
+
"""
|
|
292
|
+
output_path = self.temp_dir / f"silence_{duration}.mp3"
|
|
293
|
+
|
|
294
|
+
if output_path.exists():
|
|
295
|
+
return output_path
|
|
296
|
+
|
|
297
|
+
cmd = [
|
|
298
|
+
self.ffmpeg_path,
|
|
299
|
+
"-f", "lavfi",
|
|
300
|
+
"-i", f"anullsrc=r=16000:cl=mono",
|
|
301
|
+
"-t", str(duration),
|
|
302
|
+
"-y",
|
|
303
|
+
str(output_path)
|
|
304
|
+
]
|
|
305
|
+
|
|
306
|
+
result = subprocess.run(
|
|
307
|
+
cmd,
|
|
308
|
+
capture_output=True,
|
|
309
|
+
text=True,
|
|
310
|
+
timeout=30
|
|
311
|
+
)
|
|
312
|
+
|
|
313
|
+
if result.returncode != 0:
|
|
314
|
+
raise RuntimeError(f"Failed to generate silence: {result.stderr}")
|
|
315
|
+
|
|
316
|
+
return output_path
|
|
317
|
+
|
|
318
|
+
def _concatenate_segments(
|
|
319
|
+
self,
|
|
320
|
+
segments: List[Path],
|
|
321
|
+
output_path: Path
|
|
322
|
+
) -> Path:
|
|
323
|
+
"""
|
|
324
|
+
拼接多个音频片段
|
|
325
|
+
|
|
326
|
+
Args:
|
|
327
|
+
segments: 音频片段路径列表
|
|
328
|
+
output_path: 输出文件路径
|
|
329
|
+
|
|
330
|
+
Returns:
|
|
331
|
+
拼接后的音频文件路径
|
|
332
|
+
"""
|
|
333
|
+
# 创建文件列表
|
|
334
|
+
list_file = self.temp_dir / "concat_list.txt"
|
|
335
|
+
with open(list_file, "w") as f:
|
|
336
|
+
for seg in segments:
|
|
337
|
+
# 需要转义路径中的特殊字符
|
|
338
|
+
escaped_path = str(seg).replace("'", "'\\''")
|
|
339
|
+
f.write(f"file '{escaped_path}'\n")
|
|
340
|
+
|
|
341
|
+
cmd = [
|
|
342
|
+
self.ffmpeg_path,
|
|
343
|
+
"-f", "concat",
|
|
344
|
+
"-safe", "0",
|
|
345
|
+
"-i", str(list_file),
|
|
346
|
+
"-c", "copy",
|
|
347
|
+
"-y",
|
|
348
|
+
str(output_path)
|
|
349
|
+
]
|
|
350
|
+
|
|
351
|
+
result = subprocess.run(
|
|
352
|
+
cmd,
|
|
353
|
+
capture_output=True,
|
|
354
|
+
text=True,
|
|
355
|
+
timeout=120
|
|
356
|
+
)
|
|
357
|
+
|
|
358
|
+
if result.returncode != 0:
|
|
359
|
+
raise RuntimeError(f"Failed to concatenate audio: {result.stderr}")
|
|
360
|
+
|
|
361
|
+
return output_path
|
|
362
|
+
|
|
363
|
+
def _get_duration(self, audio_path: Path) -> float:
|
|
364
|
+
"""
|
|
365
|
+
获取音频时长
|
|
366
|
+
|
|
367
|
+
Args:
|
|
368
|
+
audio_path: 音频文件路径
|
|
369
|
+
|
|
370
|
+
Returns:
|
|
371
|
+
时长(秒)
|
|
372
|
+
"""
|
|
373
|
+
cmd = [
|
|
374
|
+
self.ffmpeg_path,
|
|
375
|
+
"-i", str(audio_path),
|
|
376
|
+
"-hide_banner",
|
|
377
|
+
"-f", "null",
|
|
378
|
+
"-"
|
|
379
|
+
]
|
|
380
|
+
|
|
381
|
+
result = subprocess.run(cmd, capture_output=True, text=True)
|
|
382
|
+
|
|
383
|
+
# 从 stderr 中解析时长,格式: " Duration: 00:00:03.45, ..."
|
|
384
|
+
import re
|
|
385
|
+
match = re.search(r"Duration: (\d+):(\d+):(\d+\.?\d*)", result.stderr)
|
|
386
|
+
if match:
|
|
387
|
+
hours, minutes, seconds = match.groups()
|
|
388
|
+
return int(hours) * 3600 + int(minutes) * 60 + float(seconds)
|
|
389
|
+
return 0.0
|
|
@@ -0,0 +1,245 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
音频格式转换器 - 将 MP3 转换为飞书语音格式
|
|
4
|
+
|
|
5
|
+
飞书语音消息要求:
|
|
6
|
+
- 格式: Opus / Speex / AAC / AMR
|
|
7
|
+
- 采样率: 8000Hz / 16000Hz
|
|
8
|
+
- 声道: 单声道
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import subprocess
|
|
12
|
+
import shutil
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from typing import Optional
|
|
15
|
+
from dataclasses import dataclass
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@dataclass
|
|
19
|
+
class ConversionResult:
|
|
20
|
+
"""转换结果"""
|
|
21
|
+
success: bool
|
|
22
|
+
output_path: Optional[Path] = None
|
|
23
|
+
error_message: Optional[str] = None
|
|
24
|
+
duration_seconds: Optional[float] = None
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class AudioConverter:
|
|
28
|
+
"""音频格式转换器"""
|
|
29
|
+
|
|
30
|
+
# 飞书支持的语音格式
|
|
31
|
+
SUPPORTED_FORMATS = ["opus", "speex", "aac", "amr"]
|
|
32
|
+
SUPPORTED_SAMPLE_RATES = [8000, 16000]
|
|
33
|
+
|
|
34
|
+
def __init__(self, ffmpeg_path: Optional[str] = None):
|
|
35
|
+
"""
|
|
36
|
+
初始化转换器
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
ffmpeg_path: ffmpeg 可执行文件路径(默认自动检测)
|
|
40
|
+
"""
|
|
41
|
+
self.ffmpeg_path = ffmpeg_path or shutil.which("ffmpeg")
|
|
42
|
+
if not self.ffmpeg_path:
|
|
43
|
+
raise RuntimeError(
|
|
44
|
+
"ffmpeg not found. Install it with: brew install ffmpeg (macOS) "
|
|
45
|
+
"or apt-get install ffmpeg (Ubuntu)"
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
def convert_to_voice(
|
|
49
|
+
self,
|
|
50
|
+
input_path: Path,
|
|
51
|
+
output_path: Optional[Path] = None,
|
|
52
|
+
format: str = "opus",
|
|
53
|
+
sample_rate: int = 16000,
|
|
54
|
+
bitrate: str = "24k"
|
|
55
|
+
) -> ConversionResult:
|
|
56
|
+
"""
|
|
57
|
+
将音频文件转换为飞书语音格式
|
|
58
|
+
|
|
59
|
+
Args:
|
|
60
|
+
input_path: 输入文件路径(支持 MP3, WAV, M4A 等)
|
|
61
|
+
output_path: 输出文件路径(可选,默认同目录更换扩展名)
|
|
62
|
+
format: 输出格式(opus, speex, aac, amr)
|
|
63
|
+
sample_rate: 采样率(8000 或 16000)
|
|
64
|
+
bitrate: 比特率(默认 24k,适合语音)
|
|
65
|
+
|
|
66
|
+
Returns:
|
|
67
|
+
ConversionResult: 转换结果
|
|
68
|
+
"""
|
|
69
|
+
# 参数验证
|
|
70
|
+
if format not in self.SUPPORTED_FORMATS:
|
|
71
|
+
return ConversionResult(
|
|
72
|
+
success=False,
|
|
73
|
+
error_message=f"Unsupported format: {format}. Supported: {self.SUPPORTED_FORMATS}"
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
if sample_rate not in self.SUPPORTED_SAMPLE_RATES:
|
|
77
|
+
return ConversionResult(
|
|
78
|
+
success=False,
|
|
79
|
+
error_message=f"Unsupported sample rate: {sample_rate}. Supported: {self.SUPPORTED_SAMPLE_RATES}"
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
input_path = Path(input_path)
|
|
83
|
+
if not input_path.exists():
|
|
84
|
+
return ConversionResult(
|
|
85
|
+
success=False,
|
|
86
|
+
error_message=f"Input file not found: {input_path}"
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
# 确定输出路径
|
|
90
|
+
if output_path is None:
|
|
91
|
+
output_path = input_path.with_suffix(f".{format}")
|
|
92
|
+
else:
|
|
93
|
+
output_path = Path(output_path)
|
|
94
|
+
|
|
95
|
+
# 确保输出目录存在
|
|
96
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
97
|
+
|
|
98
|
+
# 构建 ffmpeg 命令
|
|
99
|
+
codec_map = {
|
|
100
|
+
"opus": "libopus",
|
|
101
|
+
"speex": "libspeex",
|
|
102
|
+
"aac": "aac",
|
|
103
|
+
"amr": "libvo_amrwbenc"
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
cmd = [
|
|
107
|
+
self.ffmpeg_path,
|
|
108
|
+
"-i", str(input_path), # 输入文件
|
|
109
|
+
"-acodec", codec_map[format], # 编码器
|
|
110
|
+
"-ar", str(sample_rate), # 采样率
|
|
111
|
+
"-ac", "1", # 单声道
|
|
112
|
+
"-ab", bitrate, # 比特率
|
|
113
|
+
"-y", # 覆盖输出文件
|
|
114
|
+
str(output_path)
|
|
115
|
+
]
|
|
116
|
+
|
|
117
|
+
# 特定格式优化
|
|
118
|
+
if format == "opus":
|
|
119
|
+
# Opus 针对语音优化
|
|
120
|
+
cmd.extend(["-application", "audio"])
|
|
121
|
+
elif format == "speex":
|
|
122
|
+
# Speex 针对语音优化
|
|
123
|
+
cmd.extend(["-compression_level", "10"])
|
|
124
|
+
|
|
125
|
+
try:
|
|
126
|
+
result = subprocess.run(
|
|
127
|
+
cmd,
|
|
128
|
+
capture_output=True,
|
|
129
|
+
text=True,
|
|
130
|
+
timeout=60 # 60秒超时
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
if result.returncode != 0:
|
|
134
|
+
return ConversionResult(
|
|
135
|
+
success=False,
|
|
136
|
+
error_message=f"ffmpeg error: {result.stderr}"
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
# 获取音频时长
|
|
140
|
+
duration = self._get_duration(output_path)
|
|
141
|
+
|
|
142
|
+
return ConversionResult(
|
|
143
|
+
success=True,
|
|
144
|
+
output_path=output_path,
|
|
145
|
+
duration_seconds=duration
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
except subprocess.TimeoutExpired:
|
|
149
|
+
return ConversionResult(
|
|
150
|
+
success=False,
|
|
151
|
+
error_message="Conversion timeout (>60s)"
|
|
152
|
+
)
|
|
153
|
+
except Exception as e:
|
|
154
|
+
return ConversionResult(
|
|
155
|
+
success=False,
|
|
156
|
+
error_message=str(e)
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
def _get_duration(self, audio_path: Path) -> float:
|
|
160
|
+
"""获取音频时长(秒)"""
|
|
161
|
+
cmd = [
|
|
162
|
+
self.ffmpeg_path,
|
|
163
|
+
"-i", str(audio_path),
|
|
164
|
+
"-hide_banner",
|
|
165
|
+
"-f", "null",
|
|
166
|
+
"-"
|
|
167
|
+
]
|
|
168
|
+
|
|
169
|
+
result = subprocess.run(cmd, capture_output=True, text=True)
|
|
170
|
+
|
|
171
|
+
# 从 stderr 中解析时长,格式: " Duration: 00:00:03.45, ..."
|
|
172
|
+
import re
|
|
173
|
+
match = re.search(r"Duration: (\d+):(\d+):(\d+\.?\d*)", result.stderr)
|
|
174
|
+
if match:
|
|
175
|
+
hours, minutes, seconds = match.groups()
|
|
176
|
+
return int(hours) * 3600 + int(minutes) * 60 + float(seconds)
|
|
177
|
+
return 0.0
|
|
178
|
+
|
|
179
|
+
def batch_convert(
|
|
180
|
+
self,
|
|
181
|
+
input_dir: Path,
|
|
182
|
+
output_dir: Optional[Path] = None,
|
|
183
|
+
format: str = "opus",
|
|
184
|
+
sample_rate: int = 16000
|
|
185
|
+
) -> dict:
|
|
186
|
+
"""
|
|
187
|
+
批量转换目录中的音频文件
|
|
188
|
+
|
|
189
|
+
Args:
|
|
190
|
+
input_dir: 输入目录
|
|
191
|
+
output_dir: 输出目录(可选,默认在输入目录下创建 voice/ 子目录)
|
|
192
|
+
format: 输出格式
|
|
193
|
+
sample_rate: 采样率
|
|
194
|
+
|
|
195
|
+
Returns:
|
|
196
|
+
转换结果字典 {原文件名: ConversionResult}
|
|
197
|
+
"""
|
|
198
|
+
input_dir = Path(input_dir)
|
|
199
|
+
if output_dir is None:
|
|
200
|
+
output_dir = input_dir / "voice"
|
|
201
|
+
else:
|
|
202
|
+
output_dir = Path(output_dir)
|
|
203
|
+
|
|
204
|
+
results = {}
|
|
205
|
+
|
|
206
|
+
# 支持的输入格式
|
|
207
|
+
input_extensions = [".mp3", ".wav", ".m4a", ".flac", ".ogg"]
|
|
208
|
+
|
|
209
|
+
for input_file in input_dir.glob("*"):
|
|
210
|
+
if input_file.suffix.lower() not in input_extensions:
|
|
211
|
+
continue
|
|
212
|
+
|
|
213
|
+
output_file = output_dir / input_file.with_suffix(f".{format}").name
|
|
214
|
+
results[input_file.name] = self.convert_to_voice(
|
|
215
|
+
input_path=input_file,
|
|
216
|
+
output_path=output_file,
|
|
217
|
+
format=format,
|
|
218
|
+
sample_rate=sample_rate
|
|
219
|
+
)
|
|
220
|
+
|
|
221
|
+
return results
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
# 便捷函数
|
|
225
|
+
def convert_mp3_to_opus(
|
|
226
|
+
input_path: Path,
|
|
227
|
+
output_path: Optional[Path] = None
|
|
228
|
+
) -> ConversionResult:
|
|
229
|
+
"""
|
|
230
|
+
将 MP3 转换为 Opus 格式(飞书推荐)
|
|
231
|
+
|
|
232
|
+
Args:
|
|
233
|
+
input_path: MP3 文件路径
|
|
234
|
+
output_path: 输出路径(可选)
|
|
235
|
+
|
|
236
|
+
Returns:
|
|
237
|
+
ConversionResult
|
|
238
|
+
"""
|
|
239
|
+
converter = AudioConverter()
|
|
240
|
+
return converter.convert_to_voice(
|
|
241
|
+
input_path=input_path,
|
|
242
|
+
output_path=output_path,
|
|
243
|
+
format="opus",
|
|
244
|
+
sample_rate=16000
|
|
245
|
+
)
|
package/scripts/cli.py
CHANGED
|
@@ -20,7 +20,8 @@ def main():
|
|
|
20
20
|
help='Data directory path (default: ~/.openclaw/state/eng-lang-tutor or OPENCLAW_STATE_DIR env)')
|
|
21
21
|
parser.add_argument('command', nargs='?',
|
|
22
22
|
choices=['show', 'backup', 'save_daily', 'record_view',
|
|
23
|
-
'stats', 'config', 'errors', 'schedule'
|
|
23
|
+
'stats', 'config', 'errors', 'schedule',
|
|
24
|
+
'generate_audio'],
|
|
24
25
|
help='Command to execute')
|
|
25
26
|
parser.add_argument('--content-type', help='Content type for save_daily (keypoint, quiz)')
|
|
26
27
|
parser.add_argument('--content', help='JSON content for save_daily')
|
|
@@ -198,6 +199,25 @@ def main():
|
|
|
198
199
|
sm.save_state(state)
|
|
199
200
|
print(f"Schedule updated: keypoint at {new_keypoint}, quiz at {new_quiz}")
|
|
200
201
|
|
|
202
|
+
elif args.command == 'generate_audio':
|
|
203
|
+
"""Generate audio for a keypoint."""
|
|
204
|
+
target_date = None
|
|
205
|
+
if args.date:
|
|
206
|
+
try:
|
|
207
|
+
target_date = datetime.strptime(args.date, '%Y-%m-%d').date()
|
|
208
|
+
except ValueError:
|
|
209
|
+
print("Error: Invalid date format. Use YYYY-MM-DD")
|
|
210
|
+
exit(1)
|
|
211
|
+
|
|
212
|
+
result = sm.generate_keypoint_audio(target_date)
|
|
213
|
+
|
|
214
|
+
if result.get('success'):
|
|
215
|
+
print(f"Audio generated: {result.get('audio_path')}")
|
|
216
|
+
print(f"Duration: {result.get('duration_seconds', 0):.1f} seconds")
|
|
217
|
+
else:
|
|
218
|
+
print(f"Failed to generate audio: {result.get('error_message')}")
|
|
219
|
+
exit(1)
|
|
220
|
+
|
|
201
221
|
|
|
202
222
|
if __name__ == "__main__":
|
|
203
223
|
main()
|
|
@@ -0,0 +1,421 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
飞书语音消息发送模块
|
|
4
|
+
|
|
5
|
+
将 TTS 生成的音频转换为飞书语音格式并发送。
|
|
6
|
+
|
|
7
|
+
使用示例:
|
|
8
|
+
from scripts.feishu_voice import FeishuVoiceSender
|
|
9
|
+
|
|
10
|
+
sender = FeishuVoiceSender(app_id="xxx", app_secret="xxx")
|
|
11
|
+
|
|
12
|
+
# 发送单条语音
|
|
13
|
+
await sender.send_voice(
|
|
14
|
+
receive_id="ou_xxx",
|
|
15
|
+
text="Hello, nice to meet you!"
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
# 发送知识点音频
|
|
19
|
+
await sender.send_keypoint_voices(
|
|
20
|
+
receive_id="ou_xxx",
|
|
21
|
+
keypoint=keypoint,
|
|
22
|
+
audio_info=audio_info
|
|
23
|
+
)
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
import os
|
|
27
|
+
import asyncio
|
|
28
|
+
import aiohttp
|
|
29
|
+
from pathlib import Path
|
|
30
|
+
from typing import Optional, Dict, Any, List
|
|
31
|
+
from dataclasses import dataclass
|
|
32
|
+
|
|
33
|
+
from .audio_converter import AudioConverter, ConversionResult
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
@dataclass
|
|
37
|
+
class VoiceSendResult:
|
|
38
|
+
"""语音发送结果"""
|
|
39
|
+
success: bool
|
|
40
|
+
message_id: Optional[str] = None
|
|
41
|
+
error_message: Optional[str] = None
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class FeishuVoiceSender:
|
|
45
|
+
"""
|
|
46
|
+
飞书语音消息发送器
|
|
47
|
+
|
|
48
|
+
工作流程:
|
|
49
|
+
1. TTS 生成 MP3 音频
|
|
50
|
+
2. 转换为 Opus 格式(飞书推荐)
|
|
51
|
+
3. 上传到飞书素材库
|
|
52
|
+
4. 发送语音消息
|
|
53
|
+
"""
|
|
54
|
+
|
|
55
|
+
FEISHU_API_BASE = "https://open.feishu.cn/open-apis"
|
|
56
|
+
|
|
57
|
+
def __init__(
|
|
58
|
+
self,
|
|
59
|
+
app_id: Optional[str] = None,
|
|
60
|
+
app_secret: Optional[str] = None,
|
|
61
|
+
tenant_key: Optional[str] = None,
|
|
62
|
+
audio_dir: Optional[Path] = None
|
|
63
|
+
):
|
|
64
|
+
"""
|
|
65
|
+
初始化飞书语音发送器
|
|
66
|
+
|
|
67
|
+
Args:
|
|
68
|
+
app_id: 飞书应用 ID(可从环境变量 FEISHU_APP_ID 读取)
|
|
69
|
+
app_secret: 飞书应用密钥(可从环境变量 FEISHU_APP_SECRET 读取)
|
|
70
|
+
tenant_key: 租户密钥(自建应用无需)
|
|
71
|
+
audio_dir: 音频缓存目录
|
|
72
|
+
"""
|
|
73
|
+
self.app_id = app_id or os.getenv("FEISHU_APP_ID")
|
|
74
|
+
self.app_secret = app_secret or os.getenv("FEISHU_APP_SECRET")
|
|
75
|
+
self.tenant_key = tenant_key
|
|
76
|
+
|
|
77
|
+
if not self.app_id or not self.app_secret:
|
|
78
|
+
raise ValueError(
|
|
79
|
+
"Missing Feishu credentials. Set FEISHU_APP_ID and FEISHU_APP_SECRET "
|
|
80
|
+
"environment variables or pass them to constructor."
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
self.audio_dir = audio_dir or Path(
|
|
84
|
+
os.getenv("OPENCLAW_STATE_DIR", "~/.openclaw/state/eng-lang-tutor")
|
|
85
|
+
).expanduser() / "audio"
|
|
86
|
+
self.audio_dir.mkdir(parents=True, exist_ok=True)
|
|
87
|
+
|
|
88
|
+
self.converter = AudioConverter()
|
|
89
|
+
self._access_token: Optional[str] = None
|
|
90
|
+
self._token_expires: float = 0
|
|
91
|
+
|
|
92
|
+
async def _get_access_token(self) -> str:
|
|
93
|
+
"""获取飞书访问令牌"""
|
|
94
|
+
import time
|
|
95
|
+
|
|
96
|
+
# 检查缓存
|
|
97
|
+
if self._access_token and time.time() < self._token_expires:
|
|
98
|
+
return self._access_token
|
|
99
|
+
|
|
100
|
+
url = f"{self.FEISHU_API_BASE}/auth/v3/tenant_access_token/internal"
|
|
101
|
+
headers = {"Content-Type": "application/json"}
|
|
102
|
+
data = {
|
|
103
|
+
"app_id": self.app_id,
|
|
104
|
+
"app_secret": self.app_secret
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
async with aiohttp.ClientSession() as session:
|
|
108
|
+
async with session.post(url, headers=headers, json=data) as resp:
|
|
109
|
+
result = await resp.json()
|
|
110
|
+
|
|
111
|
+
if result.get("code") != 0:
|
|
112
|
+
raise RuntimeError(f"Failed to get access token: {result}")
|
|
113
|
+
|
|
114
|
+
self._access_token = result["tenant_access_token"]
|
|
115
|
+
self._token_expires = time.time() + result.get("expire", 7200) - 300
|
|
116
|
+
|
|
117
|
+
return self._access_token
|
|
118
|
+
|
|
119
|
+
async def _upload_file(self, file_path: Path, file_type: str = "opus") -> str:
|
|
120
|
+
"""
|
|
121
|
+
上传文件到飞书素材库
|
|
122
|
+
|
|
123
|
+
Args:
|
|
124
|
+
file_path: 文件路径
|
|
125
|
+
file_type: 文件类型
|
|
126
|
+
|
|
127
|
+
Returns:
|
|
128
|
+
file_key
|
|
129
|
+
"""
|
|
130
|
+
token = await self._get_access_token()
|
|
131
|
+
|
|
132
|
+
url = f"{self.FEISHU_API_BASE}/im/v1/files"
|
|
133
|
+
headers = {"Authorization": f"Bearer {token}"}
|
|
134
|
+
|
|
135
|
+
async with aiohttp.ClientSession() as session:
|
|
136
|
+
with open(file_path, "rb") as f:
|
|
137
|
+
form = aiohttp.FormData()
|
|
138
|
+
form.add_field("file_type", file_type)
|
|
139
|
+
form.add_field("file_name", file_path.name)
|
|
140
|
+
form.add_field("file", f, filename=file_path.name)
|
|
141
|
+
|
|
142
|
+
async with session.post(url, headers=headers, data=form) as resp:
|
|
143
|
+
result = await resp.json()
|
|
144
|
+
|
|
145
|
+
if result.get("code") != 0:
|
|
146
|
+
raise RuntimeError(f"Failed to upload file: {result}")
|
|
147
|
+
|
|
148
|
+
return result["data"]["file_key"]
|
|
149
|
+
|
|
150
|
+
async def _send_file_message(
|
|
151
|
+
self,
|
|
152
|
+
receive_id: str,
|
|
153
|
+
file_key: str,
|
|
154
|
+
receive_id_type: str = "open_id"
|
|
155
|
+
) -> str:
|
|
156
|
+
"""
|
|
157
|
+
发送文件消息
|
|
158
|
+
|
|
159
|
+
Args:
|
|
160
|
+
receive_id: 接收者 ID
|
|
161
|
+
file_key: 文件 key
|
|
162
|
+
receive_id_type: 接收者 ID 类型
|
|
163
|
+
|
|
164
|
+
Returns:
|
|
165
|
+
message_id
|
|
166
|
+
"""
|
|
167
|
+
token = await self._get_access_token()
|
|
168
|
+
|
|
169
|
+
url = f"{self.FEISHU_API_BASE}/im/v1/messages"
|
|
170
|
+
headers = {
|
|
171
|
+
"Authorization": f"Bearer {token}",
|
|
172
|
+
"Content-Type": "application/json"
|
|
173
|
+
}
|
|
174
|
+
params = {
|
|
175
|
+
"receive_id_type": receive_id_type
|
|
176
|
+
}
|
|
177
|
+
data = {
|
|
178
|
+
"receive_id": receive_id,
|
|
179
|
+
"msg_type": "file",
|
|
180
|
+
"content": f'{{"file_key": "{file_key}"}}'
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
async with aiohttp.ClientSession() as session:
|
|
184
|
+
async with session.post(url, headers=headers, params=params, json=data) as resp:
|
|
185
|
+
result = await resp.json()
|
|
186
|
+
|
|
187
|
+
if result.get("code") != 0:
|
|
188
|
+
raise RuntimeError(f"Failed to send message: {result}")
|
|
189
|
+
|
|
190
|
+
return result["data"]["message_id"]
|
|
191
|
+
|
|
192
|
+
async def send_voice(
|
|
193
|
+
self,
|
|
194
|
+
receive_id: str,
|
|
195
|
+
audio_path: Path,
|
|
196
|
+
receive_id_type: str = "open_id",
|
|
197
|
+
auto_convert: bool = True,
|
|
198
|
+
delete_after_send: bool = False
|
|
199
|
+
) -> VoiceSendResult:
|
|
200
|
+
"""
|
|
201
|
+
发送语音消息
|
|
202
|
+
|
|
203
|
+
Args:
|
|
204
|
+
receive_id: 接收者 ID(open_id / user_id / union_id)
|
|
205
|
+
audio_path: 音频文件路径(MP3 或 Opus)
|
|
206
|
+
receive_id_type: 接收者 ID 类型
|
|
207
|
+
auto_convert: 是否自动转换为 Opus 格式
|
|
208
|
+
delete_after_send: 发送后是否删除临时文件
|
|
209
|
+
|
|
210
|
+
Returns:
|
|
211
|
+
VoiceSendResult
|
|
212
|
+
"""
|
|
213
|
+
audio_path = Path(audio_path)
|
|
214
|
+
temp_file = None
|
|
215
|
+
|
|
216
|
+
try:
|
|
217
|
+
# 如果是 MP3,转换为 Opus
|
|
218
|
+
if auto_convert and audio_path.suffix.lower() == ".mp3":
|
|
219
|
+
opus_path = audio_path.with_suffix(".opus")
|
|
220
|
+
result = self.converter.convert_to_voice(
|
|
221
|
+
input_path=audio_path,
|
|
222
|
+
output_path=opus_path,
|
|
223
|
+
format="opus",
|
|
224
|
+
sample_rate=16000
|
|
225
|
+
)
|
|
226
|
+
if not result.success:
|
|
227
|
+
return VoiceSendResult(
|
|
228
|
+
success=False,
|
|
229
|
+
error_message=f"Audio conversion failed: {result.error_message}"
|
|
230
|
+
)
|
|
231
|
+
audio_path = opus_path
|
|
232
|
+
temp_file = opus_path if delete_after_send else None
|
|
233
|
+
|
|
234
|
+
# 上传文件
|
|
235
|
+
file_key = await self._upload_file(audio_path)
|
|
236
|
+
|
|
237
|
+
# 发送消息
|
|
238
|
+
message_id = await self._send_file_message(
|
|
239
|
+
receive_id=receive_id,
|
|
240
|
+
file_key=file_key,
|
|
241
|
+
receive_id_type=receive_id_type
|
|
242
|
+
)
|
|
243
|
+
|
|
244
|
+
return VoiceSendResult(
|
|
245
|
+
success=True,
|
|
246
|
+
message_id=message_id
|
|
247
|
+
)
|
|
248
|
+
|
|
249
|
+
except Exception as e:
|
|
250
|
+
return VoiceSendResult(
|
|
251
|
+
success=False,
|
|
252
|
+
error_message=str(e)
|
|
253
|
+
)
|
|
254
|
+
finally:
|
|
255
|
+
# 清理临时文件
|
|
256
|
+
if temp_file and temp_file.exists():
|
|
257
|
+
temp_file.unlink()
|
|
258
|
+
|
|
259
|
+
async def send_voice_from_text(
|
|
260
|
+
self,
|
|
261
|
+
receive_id: str,
|
|
262
|
+
text: str,
|
|
263
|
+
voice: str = "catherine",
|
|
264
|
+
speed: float = 0.9,
|
|
265
|
+
receive_id_type: str = "open_id"
|
|
266
|
+
) -> VoiceSendResult:
|
|
267
|
+
"""
|
|
268
|
+
从文本直接生成并发送语音
|
|
269
|
+
|
|
270
|
+
Args:
|
|
271
|
+
receive_id: 接收者 ID
|
|
272
|
+
text: 要转换的文本
|
|
273
|
+
voice: 音色
|
|
274
|
+
speed: 语速
|
|
275
|
+
receive_id_type: 接收者 ID 类型
|
|
276
|
+
|
|
277
|
+
Returns:
|
|
278
|
+
VoiceSendResult
|
|
279
|
+
"""
|
|
280
|
+
from .tts import TTSManager
|
|
281
|
+
|
|
282
|
+
try:
|
|
283
|
+
# 生成 TTS 音频
|
|
284
|
+
manager = TTSManager.from_env()
|
|
285
|
+
output_path = self.audio_dir / f"temp_{hash(text)}.mp3"
|
|
286
|
+
|
|
287
|
+
result = manager.synthesize(
|
|
288
|
+
text=text,
|
|
289
|
+
output_path=output_path,
|
|
290
|
+
voice=voice,
|
|
291
|
+
speed=speed
|
|
292
|
+
)
|
|
293
|
+
|
|
294
|
+
if not result.success:
|
|
295
|
+
return VoiceSendResult(
|
|
296
|
+
success=False,
|
|
297
|
+
error_message=f"TTS failed: {result.error_message}"
|
|
298
|
+
)
|
|
299
|
+
|
|
300
|
+
# 发送语音
|
|
301
|
+
return await self.send_voice(
|
|
302
|
+
receive_id=receive_id,
|
|
303
|
+
audio_path=output_path,
|
|
304
|
+
receive_id_type=receive_id_type,
|
|
305
|
+
auto_convert=True,
|
|
306
|
+
delete_after_send=True
|
|
307
|
+
)
|
|
308
|
+
|
|
309
|
+
except Exception as e:
|
|
310
|
+
return VoiceSendResult(
|
|
311
|
+
success=False,
|
|
312
|
+
error_message=str(e)
|
|
313
|
+
)
|
|
314
|
+
|
|
315
|
+
async def send_keypoint_voices(
|
|
316
|
+
self,
|
|
317
|
+
receive_id: str,
|
|
318
|
+
keypoint: Dict[str, Any],
|
|
319
|
+
audio_info: Dict[str, Any],
|
|
320
|
+
receive_id_type: str = "open_id",
|
|
321
|
+
include_dialogue: bool = True,
|
|
322
|
+
include_expressions: bool = True
|
|
323
|
+
) -> List[VoiceSendResult]:
|
|
324
|
+
"""
|
|
325
|
+
发送知识点的所有语音
|
|
326
|
+
|
|
327
|
+
Args:
|
|
328
|
+
receive_id: 接收者 ID
|
|
329
|
+
keypoint: 知识点数据
|
|
330
|
+
audio_info: TTS 生成的音频信息
|
|
331
|
+
receive_id_type: 接收者 ID 类型
|
|
332
|
+
include_dialogue: 是否发送对话音频
|
|
333
|
+
include_expressions: 是否发送表达音频
|
|
334
|
+
|
|
335
|
+
Returns:
|
|
336
|
+
发送结果列表
|
|
337
|
+
"""
|
|
338
|
+
results = []
|
|
339
|
+
date_str = audio_info.get("generated_at", "")[:10] # YYYY-MM-DD
|
|
340
|
+
|
|
341
|
+
# 发送对话音频
|
|
342
|
+
if include_dialogue:
|
|
343
|
+
for item in audio_info.get("dialogue", []):
|
|
344
|
+
if "error" in item:
|
|
345
|
+
results.append(VoiceSendResult(
|
|
346
|
+
success=False,
|
|
347
|
+
error_message=f"Dialogue generation failed: {item['error']}"
|
|
348
|
+
))
|
|
349
|
+
continue
|
|
350
|
+
|
|
351
|
+
audio_url = item.get("audio_url", "")
|
|
352
|
+
if not audio_url:
|
|
353
|
+
continue
|
|
354
|
+
|
|
355
|
+
# 解析路径:audio/YYYY-MM-DD/filename.opus
|
|
356
|
+
parts = audio_url.split("/")
|
|
357
|
+
audio_path = self.audio_dir / parts[1] / parts[2]
|
|
358
|
+
|
|
359
|
+
if not audio_path.exists():
|
|
360
|
+
# 尝试 MP3 扩展名
|
|
361
|
+
audio_path = audio_path.with_suffix(".mp3")
|
|
362
|
+
|
|
363
|
+
if audio_path.exists():
|
|
364
|
+
speaker = item.get("speaker", "")
|
|
365
|
+
text = item.get("text", "")
|
|
366
|
+
|
|
367
|
+
result = await self.send_voice(
|
|
368
|
+
receive_id=receive_id,
|
|
369
|
+
audio_path=audio_path,
|
|
370
|
+
receive_id_type=receive_id_type,
|
|
371
|
+
auto_convert=True
|
|
372
|
+
)
|
|
373
|
+
results.append(result)
|
|
374
|
+
|
|
375
|
+
# 发送表达音频
|
|
376
|
+
if include_expressions:
|
|
377
|
+
for item in audio_info.get("expressions", []):
|
|
378
|
+
if "error" in item:
|
|
379
|
+
results.append(VoiceSendResult(
|
|
380
|
+
success=False,
|
|
381
|
+
error_message=f"Expression generation failed: {item['error']}"
|
|
382
|
+
))
|
|
383
|
+
continue
|
|
384
|
+
|
|
385
|
+
audio_url = item.get("audio_url", "")
|
|
386
|
+
if not audio_url:
|
|
387
|
+
continue
|
|
388
|
+
|
|
389
|
+
parts = audio_url.split("/")
|
|
390
|
+
audio_path = self.audio_dir / parts[1] / parts[2]
|
|
391
|
+
|
|
392
|
+
if not audio_path.exists():
|
|
393
|
+
audio_path = audio_path.with_suffix(".mp3")
|
|
394
|
+
|
|
395
|
+
if audio_path.exists():
|
|
396
|
+
result = await self.send_voice(
|
|
397
|
+
receive_id=receive_id,
|
|
398
|
+
audio_path=audio_path,
|
|
399
|
+
receive_id_type=receive_id_type,
|
|
400
|
+
auto_convert=True
|
|
401
|
+
)
|
|
402
|
+
results.append(result)
|
|
403
|
+
|
|
404
|
+
return results
|
|
405
|
+
|
|
406
|
+
|
|
407
|
+
# 同步包装器(用于非异步环境)
|
|
408
|
+
class FeishuVoiceSenderSync:
|
|
409
|
+
"""飞书语音发送器的同步包装"""
|
|
410
|
+
|
|
411
|
+
def __init__(self, *args, **kwargs):
|
|
412
|
+
self._async_sender = FeishuVoiceSender(*args, **kwargs)
|
|
413
|
+
|
|
414
|
+
def send_voice(self, *args, **kwargs) -> VoiceSendResult:
|
|
415
|
+
return asyncio.run(self._async_sender.send_voice(*args, **kwargs))
|
|
416
|
+
|
|
417
|
+
def send_voice_from_text(self, *args, **kwargs) -> VoiceSendResult:
|
|
418
|
+
return asyncio.run(self._async_sender.send_voice_from_text(*args, **kwargs))
|
|
419
|
+
|
|
420
|
+
def send_keypoint_voices(self, *args, **kwargs) -> List[VoiceSendResult]:
|
|
421
|
+
return asyncio.run(self._async_sender.send_keypoint_voices(*args, **kwargs))
|
package/scripts/state_manager.py
CHANGED
|
@@ -287,7 +287,8 @@ class StateManager:
|
|
|
287
287
|
return daily_path
|
|
288
288
|
|
|
289
289
|
def save_daily_content(self, content_type: str, content: Dict[str, Any],
|
|
290
|
-
target_date: Optional[date] = None
|
|
290
|
+
target_date: Optional[date] = None,
|
|
291
|
+
generate_audio: bool = True) -> Path:
|
|
291
292
|
"""
|
|
292
293
|
Save content to the daily directory.
|
|
293
294
|
|
|
@@ -295,6 +296,7 @@ class StateManager:
|
|
|
295
296
|
content_type: Type of content ('keypoint', 'quiz', 'user_answers')
|
|
296
297
|
content: Content dictionary to save
|
|
297
298
|
target_date: Date for the content (defaults to today)
|
|
299
|
+
generate_audio: Whether to auto-generate audio for keypoints (default True)
|
|
298
300
|
|
|
299
301
|
Returns:
|
|
300
302
|
Path to the saved file
|
|
@@ -310,8 +312,95 @@ class StateManager:
|
|
|
310
312
|
with open(file_path, 'w', encoding='utf-8') as f:
|
|
311
313
|
json.dump(content, f, ensure_ascii=False, indent=2)
|
|
312
314
|
|
|
315
|
+
# Auto-generate audio for keypoints
|
|
316
|
+
if content_type == 'keypoint' and generate_audio:
|
|
317
|
+
try:
|
|
318
|
+
audio_result = self.generate_keypoint_audio(target_date)
|
|
319
|
+
if audio_result.get('success'):
|
|
320
|
+
# Update keypoint with audio metadata
|
|
321
|
+
content['audio'] = {
|
|
322
|
+
'composed': audio_result.get('audio_path'),
|
|
323
|
+
'duration_seconds': audio_result.get('duration_seconds'),
|
|
324
|
+
'generated_at': datetime.now().isoformat()
|
|
325
|
+
}
|
|
326
|
+
# Re-save with audio info
|
|
327
|
+
with open(file_path, 'w', encoding='utf-8') as f:
|
|
328
|
+
json.dump(content, f, ensure_ascii=False, indent=2)
|
|
329
|
+
except Exception as e:
|
|
330
|
+
print(f"Warning: Audio generation failed: {e}")
|
|
331
|
+
|
|
313
332
|
return file_path
|
|
314
333
|
|
|
334
|
+
def generate_keypoint_audio(self, target_date: Optional[date] = None) -> Dict[str, Any]:
|
|
335
|
+
"""
|
|
336
|
+
Generate composed audio for a keypoint.
|
|
337
|
+
|
|
338
|
+
Args:
|
|
339
|
+
target_date: Date for the keypoint (defaults to today)
|
|
340
|
+
|
|
341
|
+
Returns:
|
|
342
|
+
Dictionary with:
|
|
343
|
+
- success: bool
|
|
344
|
+
- audio_path: str (relative path from data_dir)
|
|
345
|
+
- duration_seconds: float
|
|
346
|
+
- error_message: str (if failed)
|
|
347
|
+
"""
|
|
348
|
+
try:
|
|
349
|
+
from .audio_composer import AudioComposer
|
|
350
|
+
from .tts import TTSManager
|
|
351
|
+
except ImportError:
|
|
352
|
+
from audio_composer import AudioComposer
|
|
353
|
+
from tts import TTSManager
|
|
354
|
+
|
|
355
|
+
if target_date is None:
|
|
356
|
+
target_date = date.today()
|
|
357
|
+
|
|
358
|
+
# Load the keypoint
|
|
359
|
+
keypoint = self.load_daily_content('keypoint', target_date)
|
|
360
|
+
if not keypoint:
|
|
361
|
+
return {
|
|
362
|
+
'success': False,
|
|
363
|
+
'error_message': f'No keypoint found for {target_date}'
|
|
364
|
+
}
|
|
365
|
+
|
|
366
|
+
# Prepare output path
|
|
367
|
+
date_str = target_date.strftime('%Y-%m-%d')
|
|
368
|
+
self.audio_dir.mkdir(parents=True, exist_ok=True)
|
|
369
|
+
output_path = self.audio_dir / date_str / "keypoint_full.mp3"
|
|
370
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
371
|
+
|
|
372
|
+
try:
|
|
373
|
+
# Initialize TTS and composer (handle both package and direct imports)
|
|
374
|
+
try:
|
|
375
|
+
from .audio_composer import AudioComposer
|
|
376
|
+
from .tts import TTSManager
|
|
377
|
+
except ImportError:
|
|
378
|
+
from audio_composer import AudioComposer
|
|
379
|
+
from tts import TTSManager
|
|
380
|
+
|
|
381
|
+
tts = TTSManager.from_env()
|
|
382
|
+
composer = AudioComposer(tts)
|
|
383
|
+
|
|
384
|
+
# Compose audio
|
|
385
|
+
result = composer.compose_keypoint_audio(keypoint, output_path)
|
|
386
|
+
|
|
387
|
+
if result.success:
|
|
388
|
+
return {
|
|
389
|
+
'success': True,
|
|
390
|
+
'audio_path': f"audio/{date_str}/keypoint_full.mp3",
|
|
391
|
+
'duration_seconds': result.duration_seconds
|
|
392
|
+
}
|
|
393
|
+
else:
|
|
394
|
+
return {
|
|
395
|
+
'success': False,
|
|
396
|
+
'error_message': result.error_message
|
|
397
|
+
}
|
|
398
|
+
except Exception as e:
|
|
399
|
+
return {
|
|
400
|
+
'success': False,
|
|
401
|
+
'error_message': str(e)
|
|
402
|
+
}
|
|
403
|
+
|
|
315
404
|
def load_daily_content(self, content_type: str,
|
|
316
405
|
target_date: Optional[date] = None) -> Optional[Dict[str, Any]]:
|
|
317
406
|
"""
|
|
Binary file
|
package/scripts/tts/manager.py
CHANGED
|
@@ -27,9 +27,14 @@ from pathlib import Path
|
|
|
27
27
|
from typing import Dict, Any, Optional, Type, ClassVar
|
|
28
28
|
from datetime import date, datetime
|
|
29
29
|
import os
|
|
30
|
+
import sys
|
|
31
|
+
|
|
32
|
+
# 添加 scripts 目录到路径以导入 state_manager
|
|
33
|
+
sys.path.insert(0, str(Path(__file__).parent.parent))
|
|
30
34
|
|
|
31
35
|
from .base import TTSProvider, TTSConfig, TTSResult
|
|
32
36
|
from .providers.xunfei import XunFeiProvider
|
|
37
|
+
from state_manager import get_default_state_dir
|
|
33
38
|
|
|
34
39
|
|
|
35
40
|
# Provider 注册表
|
|
@@ -57,7 +62,7 @@ class TTSManager:
|
|
|
57
62
|
def __init__(
|
|
58
63
|
self,
|
|
59
64
|
provider: str = "xunfei",
|
|
60
|
-
data_dir: str =
|
|
65
|
+
data_dir: str = None,
|
|
61
66
|
config: Optional[TTSConfig] = None,
|
|
62
67
|
**credentials
|
|
63
68
|
):
|
|
@@ -66,12 +71,12 @@ class TTSManager:
|
|
|
66
71
|
|
|
67
72
|
Args:
|
|
68
73
|
provider: Provider 名称(目前仅支持 "xunfei")
|
|
69
|
-
data_dir:
|
|
74
|
+
data_dir: 数据目录(默认使用 OPENCLAW_STATE_DIR 或 ~/.openclaw/state/eng-lang-tutor/)
|
|
70
75
|
config: TTS 配置
|
|
71
76
|
**credentials: Provider 认证信息
|
|
72
77
|
|
|
73
78
|
示例:
|
|
74
|
-
#
|
|
79
|
+
# 讯飞(使用默认数据目录)
|
|
75
80
|
manager = TTSManager(provider="xunfei")
|
|
76
81
|
|
|
77
82
|
# 讯飞(直接传入密钥)
|
|
@@ -88,7 +93,12 @@ class TTSManager:
|
|
|
88
93
|
f"Available: {list(PROVIDERS.keys())}"
|
|
89
94
|
)
|
|
90
95
|
|
|
91
|
-
|
|
96
|
+
# 使用与 StateManager 相同的默认目录逻辑
|
|
97
|
+
if data_dir is None:
|
|
98
|
+
self.data_dir = get_default_state_dir()
|
|
99
|
+
else:
|
|
100
|
+
self.data_dir = Path(data_dir)
|
|
101
|
+
|
|
92
102
|
self.audio_dir = self.data_dir / "audio"
|
|
93
103
|
self.audio_dir.mkdir(parents=True, exist_ok=True)
|
|
94
104
|
|
|
Binary file
|
|
@@ -16,6 +16,8 @@ import base64
|
|
|
16
16
|
import hmac
|
|
17
17
|
import json
|
|
18
18
|
import os
|
|
19
|
+
import ssl
|
|
20
|
+
import certifi
|
|
19
21
|
from pathlib import Path
|
|
20
22
|
from typing import Optional, ClassVar, Dict
|
|
21
23
|
from urllib.parse import urlencode
|
|
@@ -132,8 +134,12 @@ class XunFeiProvider(TTSProvider):
|
|
|
132
134
|
data = json.loads(message)
|
|
133
135
|
if data.get("code") == 0:
|
|
134
136
|
audio = data.get("data", {}).get("audio", "")
|
|
137
|
+
status = data.get("data", {}).get("status", 0)
|
|
135
138
|
if audio:
|
|
136
139
|
audio_data.extend(base64.b64decode(audio))
|
|
140
|
+
# status=2 表示合成完成,关闭连接
|
|
141
|
+
if status == 2:
|
|
142
|
+
ws.close()
|
|
137
143
|
else:
|
|
138
144
|
error_msg = f"XunFei API error: code={data.get('code')}, message={data.get('message')}"
|
|
139
145
|
except json.JSONDecodeError as e:
|
|
@@ -170,7 +176,10 @@ class XunFeiProvider(TTSProvider):
|
|
|
170
176
|
on_error=on_error,
|
|
171
177
|
)
|
|
172
178
|
ws.on_open = on_open
|
|
173
|
-
|
|
179
|
+
# 使用 certifi 提供的 SSL 证书
|
|
180
|
+
ws.run_forever(
|
|
181
|
+
sslopt={"cert_reqs": ssl.CERT_REQUIRED, "ca_certs": certifi.where()}
|
|
182
|
+
)
|
|
174
183
|
|
|
175
184
|
if error_msg:
|
|
176
185
|
return TTSResult(success=False, error_message=error_msg)
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|