@rookiestar/eng-lang-tutor 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.local.json +22 -0
- package/.gitignore +32 -0
- package/CHANGELOG.md +37 -0
- package/CLAUDE.md +275 -0
- package/README.md +369 -0
- package/SKILL.md +613 -0
- package/bin/eng-lang-tutor.js +177 -0
- package/docs/OPENCLAW_DEPLOYMENT.md +241 -0
- package/examples/sample_keypoint_a1.json +112 -0
- package/examples/sample_keypoint_a2.json +124 -0
- package/examples/sample_keypoint_b1.json +135 -0
- package/examples/sample_keypoint_b2.json +137 -0
- package/examples/sample_keypoint_c1.json +134 -0
- package/examples/sample_keypoint_c2.json +141 -0
- package/examples/sample_quiz_a1.json +94 -0
- package/examples/sample_quiz_a2.json +94 -0
- package/examples/sample_quiz_b1.json +92 -0
- package/examples/sample_quiz_b2.json +94 -0
- package/examples/sample_quiz_c1.json +94 -0
- package/examples/sample_quiz_c2.json +104 -0
- package/package.json +41 -0
- package/references/resources.md +292 -0
- package/requirements.txt +16 -0
- package/scripts/__init__.py +28 -0
- package/scripts/audio/__init__.py +23 -0
- package/scripts/audio/composer.py +367 -0
- package/scripts/audio/converter.py +331 -0
- package/scripts/audio/feishu_voice.py +404 -0
- package/scripts/audio/tts/__init__.py +30 -0
- package/scripts/audio/tts/base.py +166 -0
- package/scripts/audio/tts/manager.py +306 -0
- package/scripts/audio/tts/providers/__init__.py +12 -0
- package/scripts/audio/tts/providers/edge.py +111 -0
- package/scripts/audio/tts/providers/xunfei.py +205 -0
- package/scripts/audio/utils.py +63 -0
- package/scripts/cli/__init__.py +7 -0
- package/scripts/cli/cli.py +229 -0
- package/scripts/cli/command_parser.py +336 -0
- package/scripts/core/__init__.py +30 -0
- package/scripts/core/constants.py +125 -0
- package/scripts/core/error_notebook.py +308 -0
- package/scripts/core/gamification.py +405 -0
- package/scripts/core/scorer.py +295 -0
- package/scripts/core/state_manager.py +814 -0
- package/scripts/eng-lang-tutor +16 -0
- package/scripts/scheduling/__init__.py +6 -0
- package/scripts/scheduling/cron_push.py +229 -0
- package/scripts/utils/__init__.py +12 -0
- package/scripts/utils/dedup.py +331 -0
- package/scripts/utils/helpers.py +82 -0
- package/templates/keypoint_schema.json +420 -0
- package/templates/prompt_templates.md +73 -0
- package/templates/prompts/display_guide.md +106 -0
- package/templates/prompts/initialization.md +350 -0
- package/templates/prompts/keypoint_generation.md +272 -0
- package/templates/prompts/output_rules.md +106 -0
- package/templates/prompts/quiz_generation.md +190 -0
- package/templates/prompts/responses.md +339 -0
- package/templates/prompts/shared_enums.md +252 -0
- package/templates/quiz_schema.json +214 -0
- package/templates/state_schema.json +277 -0
|
@@ -0,0 +1,404 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
飞书语音消息发送模块
|
|
4
|
+
|
|
5
|
+
将 TTS 生成的音频转换为飞书语音格式并发送。
|
|
6
|
+
|
|
7
|
+
使用示例:
|
|
8
|
+
from scripts.feishu_voice import FeishuVoiceSender
|
|
9
|
+
|
|
10
|
+
sender = FeishuVoiceSender(app_id="xxx", app_secret="xxx")
|
|
11
|
+
|
|
12
|
+
# 发送单条语音
|
|
13
|
+
await sender.send_voice(
|
|
14
|
+
receive_id="ou_xxx",
|
|
15
|
+
text="Hello, nice to meet you!"
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
# 发送知识点音频
|
|
19
|
+
await sender.send_keypoint_voices(
|
|
20
|
+
receive_id="ou_xxx",
|
|
21
|
+
keypoint=keypoint,
|
|
22
|
+
audio_info=audio_info
|
|
23
|
+
)
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
import os
|
|
27
|
+
import asyncio
|
|
28
|
+
import aiohttp
|
|
29
|
+
from pathlib import Path
|
|
30
|
+
from typing import Optional, Dict, Any, List
|
|
31
|
+
from dataclasses import dataclass
|
|
32
|
+
|
|
33
|
+
from .converter import AudioConverter, ConversionResult
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
@dataclass
|
|
37
|
+
class VoiceSendResult:
|
|
38
|
+
"""语音发送结果"""
|
|
39
|
+
success: bool
|
|
40
|
+
message_id: Optional[str] = None
|
|
41
|
+
error_message: Optional[str] = None
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class FeishuVoiceSender:
|
|
45
|
+
"""
|
|
46
|
+
飞书语音消息发送器
|
|
47
|
+
|
|
48
|
+
工作流程:
|
|
49
|
+
1. TTS 生成 MP3 音频
|
|
50
|
+
2. 转换为 Opus 格式(飞书推荐)
|
|
51
|
+
3. 上传到飞书素材库
|
|
52
|
+
4. 发送语音消息
|
|
53
|
+
"""
|
|
54
|
+
|
|
55
|
+
FEISHU_API_BASE = "https://open.feishu.cn/open-apis"
|
|
56
|
+
|
|
57
|
+
def __init__(
|
|
58
|
+
self,
|
|
59
|
+
app_id: Optional[str] = None,
|
|
60
|
+
app_secret: Optional[str] = None,
|
|
61
|
+
tenant_key: Optional[str] = None,
|
|
62
|
+
audio_dir: Optional[Path] = None
|
|
63
|
+
):
|
|
64
|
+
"""
|
|
65
|
+
初始化飞书语音发送器
|
|
66
|
+
|
|
67
|
+
Args:
|
|
68
|
+
app_id: 飞书应用 ID(可从环境变量 FEISHU_APP_ID 读取)
|
|
69
|
+
app_secret: 飞书应用密钥(可从环境变量 FEISHU_APP_SECRET 读取)
|
|
70
|
+
tenant_key: 租户密钥(自建应用无需)
|
|
71
|
+
audio_dir: 音频缓存目录
|
|
72
|
+
"""
|
|
73
|
+
self.app_id = app_id or os.getenv("FEISHU_APP_ID")
|
|
74
|
+
self.app_secret = app_secret or os.getenv("FEISHU_APP_SECRET")
|
|
75
|
+
self.tenant_key = tenant_key
|
|
76
|
+
|
|
77
|
+
if not self.app_id or not self.app_secret:
|
|
78
|
+
raise ValueError(
|
|
79
|
+
"Missing Feishu credentials. Set FEISHU_APP_ID and FEISHU_APP_SECRET "
|
|
80
|
+
"environment variables or pass them to constructor."
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
self.audio_dir = audio_dir or Path(
|
|
84
|
+
os.getenv("OPENCLAW_STATE_DIR", "~/.openclaw/state/eng-lang-tutor")
|
|
85
|
+
).expanduser() / "audio"
|
|
86
|
+
self.audio_dir.mkdir(parents=True, exist_ok=True)
|
|
87
|
+
|
|
88
|
+
self.converter = AudioConverter()
|
|
89
|
+
self._access_token: Optional[str] = None
|
|
90
|
+
self._token_expires: float = 0
|
|
91
|
+
|
|
92
|
+
async def _get_access_token(self) -> str:
|
|
93
|
+
"""获取飞书访问令牌"""
|
|
94
|
+
import time
|
|
95
|
+
|
|
96
|
+
# 检查缓存
|
|
97
|
+
if self._access_token and time.time() < self._token_expires:
|
|
98
|
+
return self._access_token
|
|
99
|
+
|
|
100
|
+
url = f"{self.FEISHU_API_BASE}/auth/v3/tenant_access_token/internal"
|
|
101
|
+
headers = {"Content-Type": "application/json"}
|
|
102
|
+
data = {
|
|
103
|
+
"app_id": self.app_id,
|
|
104
|
+
"app_secret": self.app_secret
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
async with aiohttp.ClientSession() as session:
|
|
108
|
+
async with session.post(url, headers=headers, json=data) as resp:
|
|
109
|
+
result = await resp.json()
|
|
110
|
+
|
|
111
|
+
if result.get("code") != 0:
|
|
112
|
+
raise RuntimeError(f"Failed to get access token: {result}")
|
|
113
|
+
|
|
114
|
+
self._access_token = result["tenant_access_token"]
|
|
115
|
+
self._token_expires = time.time() + result.get("expire", 7200) - 300
|
|
116
|
+
|
|
117
|
+
return self._access_token
|
|
118
|
+
|
|
119
|
+
async def _upload_file(self, file_path: Path, file_type: str = "opus") -> str:
|
|
120
|
+
"""
|
|
121
|
+
上传文件到飞书素材库
|
|
122
|
+
|
|
123
|
+
Args:
|
|
124
|
+
file_path: 文件路径
|
|
125
|
+
file_type: 文件类型
|
|
126
|
+
|
|
127
|
+
Returns:
|
|
128
|
+
file_key
|
|
129
|
+
"""
|
|
130
|
+
token = await self._get_access_token()
|
|
131
|
+
|
|
132
|
+
url = f"{self.FEISHU_API_BASE}/im/v1/files"
|
|
133
|
+
headers = {"Authorization": f"Bearer {token}"}
|
|
134
|
+
|
|
135
|
+
async with aiohttp.ClientSession() as session:
|
|
136
|
+
with open(file_path, "rb") as f:
|
|
137
|
+
form = aiohttp.FormData()
|
|
138
|
+
form.add_field("file_type", file_type)
|
|
139
|
+
form.add_field("file_name", file_path.name)
|
|
140
|
+
form.add_field("file", f, filename=file_path.name)
|
|
141
|
+
|
|
142
|
+
async with session.post(url, headers=headers, data=form) as resp:
|
|
143
|
+
result = await resp.json()
|
|
144
|
+
|
|
145
|
+
if result.get("code") != 0:
|
|
146
|
+
raise RuntimeError(f"Failed to upload file: {result}")
|
|
147
|
+
|
|
148
|
+
return result["data"]["file_key"]
|
|
149
|
+
|
|
150
|
+
async def _send_file_message(
|
|
151
|
+
self,
|
|
152
|
+
receive_id: str,
|
|
153
|
+
file_key: str,
|
|
154
|
+
receive_id_type: str = "open_id"
|
|
155
|
+
) -> str:
|
|
156
|
+
"""
|
|
157
|
+
发送文件消息
|
|
158
|
+
|
|
159
|
+
Args:
|
|
160
|
+
receive_id: 接收者 ID
|
|
161
|
+
file_key: 文件 key
|
|
162
|
+
receive_id_type: 接收者 ID 类型
|
|
163
|
+
|
|
164
|
+
Returns:
|
|
165
|
+
message_id
|
|
166
|
+
"""
|
|
167
|
+
token = await self._get_access_token()
|
|
168
|
+
|
|
169
|
+
url = f"{self.FEISHU_API_BASE}/im/v1/messages"
|
|
170
|
+
headers = {
|
|
171
|
+
"Authorization": f"Bearer {token}",
|
|
172
|
+
"Content-Type": "application/json"
|
|
173
|
+
}
|
|
174
|
+
params = {
|
|
175
|
+
"receive_id_type": receive_id_type
|
|
176
|
+
}
|
|
177
|
+
data = {
|
|
178
|
+
"receive_id": receive_id,
|
|
179
|
+
"msg_type": "file",
|
|
180
|
+
"content": f'{{"file_key": "{file_key}"}}'
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
async with aiohttp.ClientSession() as session:
|
|
184
|
+
async with session.post(url, headers=headers, params=params, json=data) as resp:
|
|
185
|
+
result = await resp.json()
|
|
186
|
+
|
|
187
|
+
if result.get("code") != 0:
|
|
188
|
+
raise RuntimeError(f"Failed to send message: {result}")
|
|
189
|
+
|
|
190
|
+
return result["data"]["message_id"]
|
|
191
|
+
|
|
192
|
+
async def send_voice(
|
|
193
|
+
self,
|
|
194
|
+
receive_id: str,
|
|
195
|
+
audio_path: Path,
|
|
196
|
+
receive_id_type: str = "open_id",
|
|
197
|
+
auto_convert: bool = True,
|
|
198
|
+
delete_after_send: bool = False
|
|
199
|
+
) -> VoiceSendResult:
|
|
200
|
+
"""
|
|
201
|
+
发送语音消息
|
|
202
|
+
|
|
203
|
+
Args:
|
|
204
|
+
receive_id: 接收者 ID(open_id / user_id / union_id)
|
|
205
|
+
audio_path: 音频文件路径(MP3 或 Opus)
|
|
206
|
+
receive_id_type: 接收者 ID 类型
|
|
207
|
+
auto_convert: 是否自动转换为 Opus 格式
|
|
208
|
+
delete_after_send: 发送后是否删除临时文件
|
|
209
|
+
|
|
210
|
+
Returns:
|
|
211
|
+
VoiceSendResult
|
|
212
|
+
"""
|
|
213
|
+
audio_path = Path(audio_path)
|
|
214
|
+
temp_file = None
|
|
215
|
+
|
|
216
|
+
try:
|
|
217
|
+
# 如果是 MP3,转换为 Opus
|
|
218
|
+
if auto_convert and audio_path.suffix.lower() == ".mp3":
|
|
219
|
+
opus_path = audio_path.with_suffix(".opus")
|
|
220
|
+
result = self.converter.convert_to_voice(
|
|
221
|
+
input_path=audio_path,
|
|
222
|
+
output_path=opus_path,
|
|
223
|
+
format="opus",
|
|
224
|
+
sample_rate=16000
|
|
225
|
+
)
|
|
226
|
+
if not result.success:
|
|
227
|
+
return VoiceSendResult(
|
|
228
|
+
success=False,
|
|
229
|
+
error_message=f"Audio conversion failed: {result.error_message}"
|
|
230
|
+
)
|
|
231
|
+
audio_path = opus_path
|
|
232
|
+
temp_file = opus_path if delete_after_send else None
|
|
233
|
+
|
|
234
|
+
# 上传文件
|
|
235
|
+
file_key = await self._upload_file(audio_path)
|
|
236
|
+
|
|
237
|
+
# 发送消息
|
|
238
|
+
message_id = await self._send_file_message(
|
|
239
|
+
receive_id=receive_id,
|
|
240
|
+
file_key=file_key,
|
|
241
|
+
receive_id_type=receive_id_type
|
|
242
|
+
)
|
|
243
|
+
|
|
244
|
+
return VoiceSendResult(
|
|
245
|
+
success=True,
|
|
246
|
+
message_id=message_id
|
|
247
|
+
)
|
|
248
|
+
|
|
249
|
+
except Exception as e:
|
|
250
|
+
return VoiceSendResult(
|
|
251
|
+
success=False,
|
|
252
|
+
error_message=str(e)
|
|
253
|
+
)
|
|
254
|
+
finally:
|
|
255
|
+
# 清理临时文件
|
|
256
|
+
if temp_file and temp_file.exists():
|
|
257
|
+
temp_file.unlink()
|
|
258
|
+
|
|
259
|
+
async def send_voice_from_text(
|
|
260
|
+
self,
|
|
261
|
+
receive_id: str,
|
|
262
|
+
text: str,
|
|
263
|
+
voice: str = "catherine",
|
|
264
|
+
speed: float = 0.9,
|
|
265
|
+
receive_id_type: str = "open_id"
|
|
266
|
+
) -> VoiceSendResult:
|
|
267
|
+
"""
|
|
268
|
+
从文本直接生成并发送语音
|
|
269
|
+
|
|
270
|
+
Args:
|
|
271
|
+
receive_id: 接收者 ID
|
|
272
|
+
text: 要转换的文本
|
|
273
|
+
voice: 音色
|
|
274
|
+
speed: 语速
|
|
275
|
+
receive_id_type: 接收者 ID 类型
|
|
276
|
+
|
|
277
|
+
Returns:
|
|
278
|
+
VoiceSendResult
|
|
279
|
+
"""
|
|
280
|
+
from .tts import TTSManager
|
|
281
|
+
|
|
282
|
+
try:
|
|
283
|
+
# 生成 TTS 音频
|
|
284
|
+
manager = TTSManager.from_env()
|
|
285
|
+
output_path = self.audio_dir / f"temp_{hash(text)}.mp3"
|
|
286
|
+
|
|
287
|
+
result = manager.synthesize(
|
|
288
|
+
text=text,
|
|
289
|
+
output_path=output_path,
|
|
290
|
+
voice=voice,
|
|
291
|
+
speed=speed
|
|
292
|
+
)
|
|
293
|
+
|
|
294
|
+
if not result.success:
|
|
295
|
+
return VoiceSendResult(
|
|
296
|
+
success=False,
|
|
297
|
+
error_message=f"TTS failed: {result.error_message}"
|
|
298
|
+
)
|
|
299
|
+
|
|
300
|
+
# 发送语音
|
|
301
|
+
return await self.send_voice(
|
|
302
|
+
receive_id=receive_id,
|
|
303
|
+
audio_path=output_path,
|
|
304
|
+
receive_id_type=receive_id_type,
|
|
305
|
+
auto_convert=True,
|
|
306
|
+
delete_after_send=True
|
|
307
|
+
)
|
|
308
|
+
|
|
309
|
+
except Exception as e:
|
|
310
|
+
return VoiceSendResult(
|
|
311
|
+
success=False,
|
|
312
|
+
error_message=str(e)
|
|
313
|
+
)
|
|
314
|
+
|
|
315
|
+
async def send_keypoint_voices(
|
|
316
|
+
self,
|
|
317
|
+
receive_id: str,
|
|
318
|
+
keypoint: Dict[str, Any],
|
|
319
|
+
audio_info: Dict[str, Any],
|
|
320
|
+
receive_id_type: str = "open_id",
|
|
321
|
+
include_dialogue: bool = True,
|
|
322
|
+
include_expressions: bool = True
|
|
323
|
+
) -> List[VoiceSendResult]:
|
|
324
|
+
"""
|
|
325
|
+
发送知识点的所有语音
|
|
326
|
+
|
|
327
|
+
Args:
|
|
328
|
+
receive_id: 接收者 ID
|
|
329
|
+
keypoint: 知识点数据
|
|
330
|
+
audio_info: TTS 生成的音频信息
|
|
331
|
+
receive_id_type: 接收者 ID 类型
|
|
332
|
+
include_dialogue: 是否发送对话音频
|
|
333
|
+
include_expressions: 是否发送表达音频
|
|
334
|
+
|
|
335
|
+
Returns:
|
|
336
|
+
发送结果列表
|
|
337
|
+
"""
|
|
338
|
+
results = []
|
|
339
|
+
date_str = audio_info.get("generated_at", "")[:10] # YYYY-MM-DD
|
|
340
|
+
|
|
341
|
+
# 发送对话音频
|
|
342
|
+
if include_dialogue:
|
|
343
|
+
for item in audio_info.get("dialogue", []):
|
|
344
|
+
if "error" in item:
|
|
345
|
+
results.append(VoiceSendResult(
|
|
346
|
+
success=False,
|
|
347
|
+
error_message=f"Dialogue generation failed: {item['error']}"
|
|
348
|
+
))
|
|
349
|
+
continue
|
|
350
|
+
|
|
351
|
+
audio_url = item.get("audio_url", "")
|
|
352
|
+
if not audio_url:
|
|
353
|
+
continue
|
|
354
|
+
|
|
355
|
+
# 解析路径:audio/YYYY-MM-DD/filename.opus
|
|
356
|
+
parts = audio_url.split("/")
|
|
357
|
+
audio_path = self.audio_dir / parts[1] / parts[2]
|
|
358
|
+
|
|
359
|
+
if not audio_path.exists():
|
|
360
|
+
# 尝试 MP3 扩展名
|
|
361
|
+
audio_path = audio_path.with_suffix(".mp3")
|
|
362
|
+
|
|
363
|
+
if audio_path.exists():
|
|
364
|
+
speaker = item.get("speaker", "")
|
|
365
|
+
text = item.get("text", "")
|
|
366
|
+
|
|
367
|
+
result = await self.send_voice(
|
|
368
|
+
receive_id=receive_id,
|
|
369
|
+
audio_path=audio_path,
|
|
370
|
+
receive_id_type=receive_id_type,
|
|
371
|
+
auto_convert=True
|
|
372
|
+
)
|
|
373
|
+
results.append(result)
|
|
374
|
+
|
|
375
|
+
# 发送表达音频
|
|
376
|
+
if include_expressions:
|
|
377
|
+
for item in audio_info.get("expressions", []):
|
|
378
|
+
if "error" in item:
|
|
379
|
+
results.append(VoiceSendResult(
|
|
380
|
+
success=False,
|
|
381
|
+
error_message=f"Expression generation failed: {item['error']}"
|
|
382
|
+
))
|
|
383
|
+
continue
|
|
384
|
+
|
|
385
|
+
audio_url = item.get("audio_url", "")
|
|
386
|
+
if not audio_url:
|
|
387
|
+
continue
|
|
388
|
+
|
|
389
|
+
parts = audio_url.split("/")
|
|
390
|
+
audio_path = self.audio_dir / parts[1] / parts[2]
|
|
391
|
+
|
|
392
|
+
if not audio_path.exists():
|
|
393
|
+
audio_path = audio_path.with_suffix(".mp3")
|
|
394
|
+
|
|
395
|
+
if audio_path.exists():
|
|
396
|
+
result = await self.send_voice(
|
|
397
|
+
receive_id=receive_id,
|
|
398
|
+
audio_path=audio_path,
|
|
399
|
+
receive_id_type=receive_id_type,
|
|
400
|
+
auto_convert=True
|
|
401
|
+
)
|
|
402
|
+
results.append(result)
|
|
403
|
+
|
|
404
|
+
return results
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
TTS Module - Text-to-Speech integration for eng-lang-tutor
|
|
4
|
+
|
|
5
|
+
Provides a unified interface for multiple TTS providers:
|
|
6
|
+
- XunFei (讯飞): Free tier with 500 min/day, stable in China
|
|
7
|
+
- Edge-TTS: Completely free, requires VPN in China
|
|
8
|
+
|
|
9
|
+
Usage:
|
|
10
|
+
from scripts.tts import TTSManager
|
|
11
|
+
|
|
12
|
+
# Using XunFei (from environment variables)
|
|
13
|
+
manager = TTSManager.from_env()
|
|
14
|
+
|
|
15
|
+
# Using Edge-TTS (no credentials needed)
|
|
16
|
+
manager = TTSManager(provider="edge-tts")
|
|
17
|
+
|
|
18
|
+
# Generate audio for keypoint
|
|
19
|
+
audio_info = manager.generate_keypoint_audio(keypoint)
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
from .manager import TTSManager
|
|
23
|
+
from .base import TTSProvider, TTSConfig, TTSResult
|
|
24
|
+
|
|
25
|
+
__all__ = [
|
|
26
|
+
"TTSManager",
|
|
27
|
+
"TTSProvider",
|
|
28
|
+
"TTSConfig",
|
|
29
|
+
"TTSResult",
|
|
30
|
+
]
|
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
TTS Provider 抽象基类 - 所有 TTS 服务必须实现此接口
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from abc import ABC, abstractmethod
|
|
7
|
+
from dataclasses import dataclass, field
|
|
8
|
+
from typing import Optional, Dict, Any, ClassVar
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
# 支持的语速选项
|
|
13
|
+
SPEED_OPTIONS = {
|
|
14
|
+
0.5: "非常慢 (Very Slow) - 初学者跟读",
|
|
15
|
+
0.7: "慢速 (Slow) - 学习发音",
|
|
16
|
+
0.9: "正常 (Normal) - 日常学习(推荐)",
|
|
17
|
+
1.3: "快速 (Fast) - 听力挑战",
|
|
18
|
+
1.7: "非常快 (Very Fast) - 进阶训练",
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@dataclass
|
|
23
|
+
class TTSConfig:
|
|
24
|
+
"""通用 TTS 配置"""
|
|
25
|
+
# 语速(0.5, 0.7, 0.9, 1.3, 1.7)
|
|
26
|
+
speed: float = 0.9
|
|
27
|
+
# 输出格式
|
|
28
|
+
output_format: str = "mp3"
|
|
29
|
+
# 角色音色映射(可选,为空则使用 provider 默认值)
|
|
30
|
+
# 旁白 - 女声
|
|
31
|
+
narrator_voice: str = ""
|
|
32
|
+
# 对话 A - 男声
|
|
33
|
+
dialogue_a_voice: str = ""
|
|
34
|
+
# 对话 B - 女声
|
|
35
|
+
dialogue_b_voice: str = ""
|
|
36
|
+
# 兼容旧配置
|
|
37
|
+
female_voice: str = ""
|
|
38
|
+
male_voice: str = ""
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
@dataclass
|
|
42
|
+
class TTSResult:
|
|
43
|
+
"""TTS 合成结果"""
|
|
44
|
+
success: bool
|
|
45
|
+
audio_path: Optional[Path] = None
|
|
46
|
+
error_message: Optional[str] = None
|
|
47
|
+
duration_seconds: Optional[float] = None
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class TTSProvider(ABC):
|
|
51
|
+
"""
|
|
52
|
+
TTS 服务提供者抽象基类
|
|
53
|
+
|
|
54
|
+
所有 TTS 服务(讯飞、Edge-TTS 等)必须继承此类并实现抽象方法。
|
|
55
|
+
"""
|
|
56
|
+
|
|
57
|
+
# 类属性:各 provider 的默认配置
|
|
58
|
+
PROVIDER_NAME: ClassVar[str] = "base"
|
|
59
|
+
DEFAULT_FEMALE_VOICE: ClassVar[str] = ""
|
|
60
|
+
DEFAULT_MALE_VOICE: ClassVar[str] = ""
|
|
61
|
+
# 默认角色音色映射
|
|
62
|
+
DEFAULT_NARRATOR_VOICE: ClassVar[str] = "" # 旁白 - 女声
|
|
63
|
+
DEFAULT_DIALOGUE_A_VOICE: ClassVar[str] = "" # 对话 A - 男声
|
|
64
|
+
DEFAULT_DIALOGUE_B_VOICE: ClassVar[str] = "" # 对话 B - 女声
|
|
65
|
+
SUPPORTED_VOICES: ClassVar[Dict[str, str]] = {} # voice_id -> description
|
|
66
|
+
|
|
67
|
+
def __init__(self, config: Optional[TTSConfig] = None, **credentials):
|
|
68
|
+
"""
|
|
69
|
+
初始化 TTS Provider
|
|
70
|
+
|
|
71
|
+
Args:
|
|
72
|
+
config: TTS 配置
|
|
73
|
+
**credentials: Provider 特定的认证信息(api_key, appid 等)
|
|
74
|
+
"""
|
|
75
|
+
self.config = config or TTSConfig()
|
|
76
|
+
self.credentials = credentials
|
|
77
|
+
self._validate_credentials()
|
|
78
|
+
|
|
79
|
+
@abstractmethod
|
|
80
|
+
def _validate_credentials(self) -> None:
|
|
81
|
+
"""
|
|
82
|
+
验证认证信息是否完整
|
|
83
|
+
|
|
84
|
+
Raises:
|
|
85
|
+
ValueError: 认证信息缺失或无效
|
|
86
|
+
"""
|
|
87
|
+
pass
|
|
88
|
+
|
|
89
|
+
@abstractmethod
|
|
90
|
+
def synthesize(
|
|
91
|
+
self,
|
|
92
|
+
text: str,
|
|
93
|
+
output_path: Path,
|
|
94
|
+
voice: Optional[str] = None,
|
|
95
|
+
speed: Optional[float] = None
|
|
96
|
+
) -> TTSResult:
|
|
97
|
+
"""
|
|
98
|
+
合成语音
|
|
99
|
+
|
|
100
|
+
Args:
|
|
101
|
+
text: 要合成的文本
|
|
102
|
+
output_path: 输出文件路径
|
|
103
|
+
voice: 语音 ID(可选,使用配置中的默认值)
|
|
104
|
+
speed: 语速(可选,使用配置中的默认值)
|
|
105
|
+
|
|
106
|
+
Returns:
|
|
107
|
+
TTSResult: 合成结果
|
|
108
|
+
"""
|
|
109
|
+
pass
|
|
110
|
+
|
|
111
|
+
def get_voice(self, gender: str = "female") -> str:
|
|
112
|
+
"""
|
|
113
|
+
获取指定性别的语音 ID(兼容旧接口)
|
|
114
|
+
|
|
115
|
+
Args:
|
|
116
|
+
gender: 性别 ("female" 或 "male")
|
|
117
|
+
|
|
118
|
+
Returns:
|
|
119
|
+
语音 ID
|
|
120
|
+
"""
|
|
121
|
+
if gender == "female":
|
|
122
|
+
return self.config.female_voice or self.DEFAULT_FEMALE_VOICE
|
|
123
|
+
return self.config.male_voice or self.DEFAULT_MALE_VOICE
|
|
124
|
+
|
|
125
|
+
def get_voice_by_role(self, role: str) -> str:
|
|
126
|
+
"""
|
|
127
|
+
获取指定角色的语音 ID
|
|
128
|
+
|
|
129
|
+
Args:
|
|
130
|
+
role: 角色 ("narrator", "dialogue_a", "dialogue_b")
|
|
131
|
+
|
|
132
|
+
Returns:
|
|
133
|
+
语音 ID
|
|
134
|
+
"""
|
|
135
|
+
if role == "narrator":
|
|
136
|
+
return self.config.narrator_voice or self.DEFAULT_NARRATOR_VOICE or self.DEFAULT_FEMALE_VOICE
|
|
137
|
+
elif role == "dialogue_a":
|
|
138
|
+
return self.config.dialogue_a_voice or self.DEFAULT_DIALOGUE_A_VOICE or self.DEFAULT_MALE_VOICE
|
|
139
|
+
elif role == "dialogue_b":
|
|
140
|
+
return self.config.dialogue_b_voice or self.DEFAULT_DIALOGUE_B_VOICE or self.DEFAULT_FEMALE_VOICE
|
|
141
|
+
else:
|
|
142
|
+
return self.get_voice("female")
|
|
143
|
+
|
|
144
|
+
@classmethod
|
|
145
|
+
def list_voices(cls) -> Dict[str, str]:
|
|
146
|
+
"""
|
|
147
|
+
列出所有支持的语音
|
|
148
|
+
|
|
149
|
+
Returns:
|
|
150
|
+
语音 ID -> 描述 的字典
|
|
151
|
+
"""
|
|
152
|
+
return cls.SUPPORTED_VOICES.copy()
|
|
153
|
+
|
|
154
|
+
@classmethod
|
|
155
|
+
def get_default_voices(cls) -> Dict[str, str]:
|
|
156
|
+
"""
|
|
157
|
+
获取默认的角色音色映射
|
|
158
|
+
|
|
159
|
+
Returns:
|
|
160
|
+
角色 -> 默认语音 ID 的字典
|
|
161
|
+
"""
|
|
162
|
+
return {
|
|
163
|
+
"narrator": cls.DEFAULT_NARRATOR_VOICE or cls.DEFAULT_FEMALE_VOICE,
|
|
164
|
+
"dialogue_a": cls.DEFAULT_DIALOGUE_A_VOICE or cls.DEFAULT_MALE_VOICE,
|
|
165
|
+
"dialogue_b": cls.DEFAULT_DIALOGUE_B_VOICE or cls.DEFAULT_FEMALE_VOICE,
|
|
166
|
+
}
|