@rookiestar/eng-lang-tutor 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.local.json +22 -0
- package/.gitignore +32 -0
- package/CHANGELOG.md +37 -0
- package/CLAUDE.md +275 -0
- package/README.md +369 -0
- package/SKILL.md +613 -0
- package/bin/eng-lang-tutor.js +177 -0
- package/docs/OPENCLAW_DEPLOYMENT.md +241 -0
- package/examples/sample_keypoint_a1.json +112 -0
- package/examples/sample_keypoint_a2.json +124 -0
- package/examples/sample_keypoint_b1.json +135 -0
- package/examples/sample_keypoint_b2.json +137 -0
- package/examples/sample_keypoint_c1.json +134 -0
- package/examples/sample_keypoint_c2.json +141 -0
- package/examples/sample_quiz_a1.json +94 -0
- package/examples/sample_quiz_a2.json +94 -0
- package/examples/sample_quiz_b1.json +92 -0
- package/examples/sample_quiz_b2.json +94 -0
- package/examples/sample_quiz_c1.json +94 -0
- package/examples/sample_quiz_c2.json +104 -0
- package/package.json +41 -0
- package/references/resources.md +292 -0
- package/requirements.txt +16 -0
- package/scripts/__init__.py +28 -0
- package/scripts/audio/__init__.py +23 -0
- package/scripts/audio/composer.py +367 -0
- package/scripts/audio/converter.py +331 -0
- package/scripts/audio/feishu_voice.py +404 -0
- package/scripts/audio/tts/__init__.py +30 -0
- package/scripts/audio/tts/base.py +166 -0
- package/scripts/audio/tts/manager.py +306 -0
- package/scripts/audio/tts/providers/__init__.py +12 -0
- package/scripts/audio/tts/providers/edge.py +111 -0
- package/scripts/audio/tts/providers/xunfei.py +205 -0
- package/scripts/audio/utils.py +63 -0
- package/scripts/cli/__init__.py +7 -0
- package/scripts/cli/cli.py +229 -0
- package/scripts/cli/command_parser.py +336 -0
- package/scripts/core/__init__.py +30 -0
- package/scripts/core/constants.py +125 -0
- package/scripts/core/error_notebook.py +308 -0
- package/scripts/core/gamification.py +405 -0
- package/scripts/core/scorer.py +295 -0
- package/scripts/core/state_manager.py +814 -0
- package/scripts/eng-lang-tutor +16 -0
- package/scripts/scheduling/__init__.py +6 -0
- package/scripts/scheduling/cron_push.py +229 -0
- package/scripts/utils/__init__.py +12 -0
- package/scripts/utils/dedup.py +331 -0
- package/scripts/utils/helpers.py +82 -0
- package/templates/keypoint_schema.json +420 -0
- package/templates/prompt_templates.md +73 -0
- package/templates/prompts/display_guide.md +106 -0
- package/templates/prompts/initialization.md +350 -0
- package/templates/prompts/keypoint_generation.md +272 -0
- package/templates/prompts/output_rules.md +106 -0
- package/templates/prompts/quiz_generation.md +190 -0
- package/templates/prompts/responses.md +339 -0
- package/templates/prompts/shared_enums.md +252 -0
- package/templates/quiz_schema.json +214 -0
- package/templates/state_schema.json +277 -0
|
@@ -0,0 +1,306 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
TTS 管理器 - 通用入口,支持多 Provider
|
|
4
|
+
|
|
5
|
+
提供统一的 TTS 接口,支持切换不同的 TTS 服务提供商。
|
|
6
|
+
|
|
7
|
+
使用示例:
|
|
8
|
+
# 方式 1:从环境变量读取配置(默认使用 Edge-TTS)
|
|
9
|
+
manager = TTSManager.from_env()
|
|
10
|
+
|
|
11
|
+
# 方式 2:使用 Edge-TTS(默认,免费无需认证)
|
|
12
|
+
manager = TTSManager(provider="edge-tts")
|
|
13
|
+
|
|
14
|
+
# 方式 3:使用讯飞(需要认证)
|
|
15
|
+
manager = TTSManager(
|
|
16
|
+
provider="xunfei",
|
|
17
|
+
appid="xxx",
|
|
18
|
+
api_key="xxx",
|
|
19
|
+
api_secret="xxx"
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
# 合成单条语音
|
|
23
|
+
result = manager.synthesize("Hello", Path("output.mp3"))
|
|
24
|
+
|
|
25
|
+
# 为知识点生成所有音频
|
|
26
|
+
audio_info = manager.generate_keypoint_audio(keypoint)
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
from pathlib import Path
|
|
30
|
+
from typing import Dict, Any, Optional, Type, ClassVar
|
|
31
|
+
from datetime import date, datetime
|
|
32
|
+
import os
|
|
33
|
+
import sys
|
|
34
|
+
|
|
35
|
+
# 添加 scripts 目录到路径以导入 state_manager
|
|
36
|
+
sys.path.insert(0, str(Path(__file__).parent.parent.parent))
|
|
37
|
+
|
|
38
|
+
from .base import TTSProvider, TTSConfig, TTSResult
|
|
39
|
+
from .providers.xunfei import XunFeiProvider
|
|
40
|
+
from .providers.edge import EdgeTTSProvider
|
|
41
|
+
|
|
42
|
+
try:
|
|
43
|
+
from ...core.state_manager import get_default_state_dir
|
|
44
|
+
except ImportError:
|
|
45
|
+
from scripts.core.state_manager import get_default_state_dir
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
# Provider 注册表
|
|
49
|
+
PROVIDERS: Dict[str, Type[TTSProvider]] = {
|
|
50
|
+
"edge-tts": EdgeTTSProvider, # 默认推荐
|
|
51
|
+
"xunfei": XunFeiProvider, # 备选方案
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class TTSManager:
|
|
56
|
+
"""
|
|
57
|
+
TTS 管理器 - 统一入口
|
|
58
|
+
|
|
59
|
+
提供统一的 TTS 接口,支持:
|
|
60
|
+
- 多 Provider 切换
|
|
61
|
+
- 环境变量配置
|
|
62
|
+
- 知识点音频批量生成
|
|
63
|
+
"""
|
|
64
|
+
|
|
65
|
+
# 支持的 Provider 列表
|
|
66
|
+
SUPPORTED_PROVIDERS: ClassVar[list] = list(PROVIDERS.keys())
|
|
67
|
+
|
|
68
|
+
def __init__(
|
|
69
|
+
self,
|
|
70
|
+
provider: str = "edge-tts",
|
|
71
|
+
data_dir: str = None,
|
|
72
|
+
config: Optional[TTSConfig] = None,
|
|
73
|
+
**credentials
|
|
74
|
+
):
|
|
75
|
+
"""
|
|
76
|
+
初始化 TTS 管理器
|
|
77
|
+
|
|
78
|
+
Args:
|
|
79
|
+
provider: Provider 名称(目前仅支持 "xunfei")
|
|
80
|
+
data_dir: 数据目录(默认使用 OPENCLAW_STATE_DIR 或 ~/.openclaw/state/eng-lang-tutor/)
|
|
81
|
+
config: TTS 配置
|
|
82
|
+
**credentials: Provider 认证信息
|
|
83
|
+
|
|
84
|
+
示例:
|
|
85
|
+
# 讯飞(使用默认数据目录)
|
|
86
|
+
manager = TTSManager(provider="xunfei")
|
|
87
|
+
|
|
88
|
+
# 讯飞(直接传入密钥)
|
|
89
|
+
manager = TTSManager(
|
|
90
|
+
provider="xunfei",
|
|
91
|
+
appid="xxx",
|
|
92
|
+
api_key="xxx",
|
|
93
|
+
api_secret="xxx"
|
|
94
|
+
)
|
|
95
|
+
"""
|
|
96
|
+
if provider not in PROVIDERS:
|
|
97
|
+
raise ValueError(
|
|
98
|
+
f"Unknown provider: {provider}. "
|
|
99
|
+
f"Available: {list(PROVIDERS.keys())}"
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
# 使用与 StateManager 相同的默认目录逻辑
|
|
103
|
+
if data_dir is None:
|
|
104
|
+
self.data_dir = get_default_state_dir()
|
|
105
|
+
else:
|
|
106
|
+
self.data_dir = Path(data_dir)
|
|
107
|
+
|
|
108
|
+
self.audio_dir = self.data_dir / "audio"
|
|
109
|
+
self.audio_dir.mkdir(parents=True, exist_ok=True)
|
|
110
|
+
|
|
111
|
+
self.provider_name = provider
|
|
112
|
+
self.config = config or TTSConfig()
|
|
113
|
+
|
|
114
|
+
# 初始化 Provider
|
|
115
|
+
self.provider: TTSProvider = PROVIDERS[provider](
|
|
116
|
+
config=self.config,
|
|
117
|
+
**credentials
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
@classmethod
|
|
121
|
+
def from_env(cls, provider: Optional[str] = None, **kwargs) -> "TTSManager":
|
|
122
|
+
"""
|
|
123
|
+
从环境变量创建 TTS 管理器
|
|
124
|
+
|
|
125
|
+
环境变量格式:
|
|
126
|
+
TTS_PROVIDER=xunfei
|
|
127
|
+
XUNFEI_APPID=xxx
|
|
128
|
+
XUNFEI_API_KEY=xxx
|
|
129
|
+
XUNFEI_API_SECRET=xxx
|
|
130
|
+
|
|
131
|
+
Args:
|
|
132
|
+
provider: Provider 名称(可选,默认从 TTS_PROVIDER 环境变量读取)
|
|
133
|
+
**kwargs: 其他参数传递给构造函数
|
|
134
|
+
|
|
135
|
+
Returns:
|
|
136
|
+
TTSManager 实例
|
|
137
|
+
"""
|
|
138
|
+
provider = provider or os.getenv("TTS_PROVIDER", "edge-tts")
|
|
139
|
+
return cls(provider=provider, **kwargs)
|
|
140
|
+
|
|
141
|
+
def switch_provider(self, provider: str, **credentials) -> None:
|
|
142
|
+
"""
|
|
143
|
+
切换 Provider
|
|
144
|
+
|
|
145
|
+
Args:
|
|
146
|
+
provider: Provider 名称
|
|
147
|
+
**credentials: 新 Provider 的认证信息
|
|
148
|
+
"""
|
|
149
|
+
if provider not in PROVIDERS:
|
|
150
|
+
raise ValueError(
|
|
151
|
+
f"Unknown provider: {provider}. "
|
|
152
|
+
f"Available: {list(PROVIDERS.keys())}"
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
self.provider_name = provider
|
|
156
|
+
self.provider = PROVIDERS[provider](
|
|
157
|
+
config=self.config,
|
|
158
|
+
**credentials
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
def synthesize(
|
|
162
|
+
self,
|
|
163
|
+
text: str,
|
|
164
|
+
output_path: Path,
|
|
165
|
+
voice: Optional[str] = None,
|
|
166
|
+
speed: Optional[float] = None
|
|
167
|
+
) -> TTSResult:
|
|
168
|
+
"""
|
|
169
|
+
合成单条语音
|
|
170
|
+
|
|
171
|
+
Args:
|
|
172
|
+
text: 要合成的文本
|
|
173
|
+
output_path: 输出文件路径
|
|
174
|
+
voice: 语音 ID(可选)
|
|
175
|
+
speed: 语速(可选,0.5-2.0)
|
|
176
|
+
|
|
177
|
+
Returns:
|
|
178
|
+
TTSResult: 合成结果
|
|
179
|
+
"""
|
|
180
|
+
return self.provider.synthesize(text, output_path, voice, speed)
|
|
181
|
+
|
|
182
|
+
def generate_keypoint_audio(
|
|
183
|
+
self,
|
|
184
|
+
keypoint: Dict[str, Any],
|
|
185
|
+
target_date: Optional[date] = None
|
|
186
|
+
) -> Dict[str, Any]:
|
|
187
|
+
"""
|
|
188
|
+
为知识点生成所有音频
|
|
189
|
+
|
|
190
|
+
生成内容:
|
|
191
|
+
- 对话音频(按角色分文件,A=女声,B=男声)
|
|
192
|
+
- 表达音频(语速更慢,适合学习)
|
|
193
|
+
|
|
194
|
+
Args:
|
|
195
|
+
keypoint: 知识点数据
|
|
196
|
+
target_date: 目标日期(可选,默认今天)
|
|
197
|
+
|
|
198
|
+
Returns:
|
|
199
|
+
音频信息字典,包含 dialogue 和 expressions 列表
|
|
200
|
+
"""
|
|
201
|
+
target_date = target_date or date.today()
|
|
202
|
+
date_str = target_date.strftime('%Y-%m-%d')
|
|
203
|
+
|
|
204
|
+
date_audio_dir = self.audio_dir / date_str
|
|
205
|
+
date_audio_dir.mkdir(parents=True, exist_ok=True)
|
|
206
|
+
|
|
207
|
+
audio_info = {
|
|
208
|
+
"dialogue": [],
|
|
209
|
+
"expressions": [],
|
|
210
|
+
"generated_at": datetime.now().isoformat(),
|
|
211
|
+
"provider": self.provider_name
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
# 1. 生成对话音频
|
|
215
|
+
for i, example in enumerate(keypoint.get("examples", [])):
|
|
216
|
+
for j, line in enumerate(example.get("dialogue", [])):
|
|
217
|
+
if ":" in line:
|
|
218
|
+
speaker, text = line.split(":", 1)
|
|
219
|
+
speaker = speaker.strip()
|
|
220
|
+
text = text.strip()
|
|
221
|
+
|
|
222
|
+
if not text:
|
|
223
|
+
continue
|
|
224
|
+
|
|
225
|
+
# A = 男声,B = 女声
|
|
226
|
+
gender = "male" if speaker.upper() == "A" else "female"
|
|
227
|
+
voice = self.provider.get_voice(gender)
|
|
228
|
+
output_path = date_audio_dir / f"dialogue_{i}_{j}_{speaker}.mp3"
|
|
229
|
+
|
|
230
|
+
result = self.synthesize(
|
|
231
|
+
text=text,
|
|
232
|
+
output_path=output_path,
|
|
233
|
+
voice=voice
|
|
234
|
+
)
|
|
235
|
+
|
|
236
|
+
if result.success:
|
|
237
|
+
audio_info["dialogue"].append({
|
|
238
|
+
"speaker": speaker,
|
|
239
|
+
"text": text,
|
|
240
|
+
"audio_url": f"audio/{date_str}/{output_path.name}"
|
|
241
|
+
})
|
|
242
|
+
else:
|
|
243
|
+
# 记录错误但不中断
|
|
244
|
+
audio_info["dialogue"].append({
|
|
245
|
+
"speaker": speaker,
|
|
246
|
+
"text": text,
|
|
247
|
+
"error": result.error_message
|
|
248
|
+
})
|
|
249
|
+
|
|
250
|
+
# 2. 生成表达音频(语速更慢)
|
|
251
|
+
for i, expr in enumerate(keypoint.get("expressions", [])):
|
|
252
|
+
phrase = expr.get("phrase", "")
|
|
253
|
+
if not phrase:
|
|
254
|
+
continue
|
|
255
|
+
|
|
256
|
+
output_path = date_audio_dir / f"expression_{i+1}.mp3"
|
|
257
|
+
result = self.synthesize(
|
|
258
|
+
text=phrase,
|
|
259
|
+
output_path=output_path,
|
|
260
|
+
speed=0.7 # 更慢语速,适合学习
|
|
261
|
+
)
|
|
262
|
+
|
|
263
|
+
if result.success:
|
|
264
|
+
audio_info["expressions"].append({
|
|
265
|
+
"text": phrase,
|
|
266
|
+
"audio_url": f"audio/{date_str}/{output_path.name}"
|
|
267
|
+
})
|
|
268
|
+
else:
|
|
269
|
+
audio_info["expressions"].append({
|
|
270
|
+
"text": phrase,
|
|
271
|
+
"error": result.error_message
|
|
272
|
+
})
|
|
273
|
+
|
|
274
|
+
return audio_info
|
|
275
|
+
|
|
276
|
+
@classmethod
|
|
277
|
+
def list_supported_providers(cls) -> list:
|
|
278
|
+
"""
|
|
279
|
+
列出所有支持的 Provider
|
|
280
|
+
|
|
281
|
+
Returns:
|
|
282
|
+
Provider 名称列表
|
|
283
|
+
"""
|
|
284
|
+
return list(PROVIDERS.keys())
|
|
285
|
+
|
|
286
|
+
def list_voices(self) -> Dict[str, str]:
|
|
287
|
+
"""
|
|
288
|
+
列出当前 Provider 支持的语音
|
|
289
|
+
|
|
290
|
+
Returns:
|
|
291
|
+
语音 ID -> 描述 的字典
|
|
292
|
+
"""
|
|
293
|
+
return self.provider.list_voices()
|
|
294
|
+
|
|
295
|
+
def get_audio_path(self, date_str: str, filename: str) -> Path:
|
|
296
|
+
"""
|
|
297
|
+
获取音频文件的完整路径
|
|
298
|
+
|
|
299
|
+
Args:
|
|
300
|
+
date_str: 日期字符串 (YYYY-MM-DD)
|
|
301
|
+
filename: 文件名
|
|
302
|
+
|
|
303
|
+
Returns:
|
|
304
|
+
完整文件路径
|
|
305
|
+
"""
|
|
306
|
+
return self.audio_dir / date_str / filename
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Edge-TTS Provider 实现
|
|
4
|
+
|
|
5
|
+
Microsoft Edge TTS 服务:
|
|
6
|
+
- 完全免费,无需 API 密钥
|
|
7
|
+
- 高质量 24kHz 神经语音
|
|
8
|
+
- 支持多种美式英语发音人
|
|
9
|
+
- 国内网络可能需要代理
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
import asyncio
|
|
13
|
+
import edge_tts
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
from typing import Optional, ClassVar, Dict
|
|
16
|
+
|
|
17
|
+
from ..base import TTSProvider, TTSConfig, TTSResult
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class EdgeTTSProvider(TTSProvider):
|
|
21
|
+
"""
|
|
22
|
+
Microsoft Edge TTS Provider
|
|
23
|
+
|
|
24
|
+
支持的美式英语发音人:
|
|
25
|
+
- en-US-JennyNeural: 女声,友好亲切(推荐)
|
|
26
|
+
- en-US-AriaNeural: 女声,自信清晰
|
|
27
|
+
- en-US-EricNeural: 男声,专业理性(推荐)
|
|
28
|
+
- en-US-GuyNeural: 男声,热情活力
|
|
29
|
+
- en-US-AnaNeural: 女声,可爱随和
|
|
30
|
+
- en-US-ChristopherNeural: 男声,权威可靠
|
|
31
|
+
|
|
32
|
+
无需认证信息,直接使用。
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
PROVIDER_NAME: ClassVar[str] = "edge-tts"
|
|
36
|
+
DEFAULT_FEMALE_VOICE: ClassVar[str] = "en-US-JennyNeural" # 友好亲切
|
|
37
|
+
DEFAULT_MALE_VOICE: ClassVar[str] = "en-US-EricNeural" # 专业理性
|
|
38
|
+
# 角色音色映射:旁白-女声,对话A-男声,对话B-女声
|
|
39
|
+
DEFAULT_NARRATOR_VOICE: ClassVar[str] = "en-US-JennyNeural" # 旁白 - 女声
|
|
40
|
+
DEFAULT_DIALOGUE_A_VOICE: ClassVar[str] = "en-US-EricNeural" # 对话 A - 男声
|
|
41
|
+
DEFAULT_DIALOGUE_B_VOICE: ClassVar[str] = "en-US-JennyNeural" # 对话 B - 女声
|
|
42
|
+
|
|
43
|
+
SUPPORTED_VOICES: ClassVar[Dict[str, str]] = {
|
|
44
|
+
"en-US-JennyNeural": "美式英语女声,友好亲切(推荐)",
|
|
45
|
+
"en-US-AriaNeural": "美式英语女声,自信清晰",
|
|
46
|
+
"en-US-EricNeural": "美式英语男声,专业理性(推荐)",
|
|
47
|
+
"en-US-GuyNeural": "美式英语男声,热情活力",
|
|
48
|
+
"en-US-AnaNeural": "美式英语女声,可爱随和",
|
|
49
|
+
"en-US-ChristopherNeural": "美式英语男声,权威可靠",
|
|
50
|
+
"en-US-MichelleNeural": "美式英语女声,友好舒适",
|
|
51
|
+
"en-US-RogerNeural": "美式英语男声,生动活泼",
|
|
52
|
+
"en-US-AndrewNeural": "美式英语男声,友好积极",
|
|
53
|
+
"en-US-BrianNeural": "美式英语男声,友好积极",
|
|
54
|
+
"en-US-EmmaNeural": "美式英语女声,友好积极",
|
|
55
|
+
"en-US-AvaNeural": "美式英语女声,友好积极",
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
def _validate_credentials(self) -> None:
|
|
59
|
+
"""
|
|
60
|
+
验证认证信息
|
|
61
|
+
|
|
62
|
+
Edge-TTS 不需要认证信息,直接通过。
|
|
63
|
+
"""
|
|
64
|
+
# Edge-TTS 不需要任何认证信息
|
|
65
|
+
pass
|
|
66
|
+
|
|
67
|
+
def synthesize(
|
|
68
|
+
self,
|
|
69
|
+
text: str,
|
|
70
|
+
output_path: Path,
|
|
71
|
+
voice: Optional[str] = None,
|
|
72
|
+
speed: Optional[float] = None
|
|
73
|
+
) -> TTSResult:
|
|
74
|
+
"""
|
|
75
|
+
合成语音
|
|
76
|
+
|
|
77
|
+
Args:
|
|
78
|
+
text: 要合成的文本
|
|
79
|
+
output_path: 输出文件路径(.mp3)
|
|
80
|
+
voice: 语音 ID(可选,默认使用女声)
|
|
81
|
+
speed: 语速(可选,0.5-2.0,1.0 = 正常)
|
|
82
|
+
|
|
83
|
+
Returns:
|
|
84
|
+
TTSResult: 合成结果
|
|
85
|
+
"""
|
|
86
|
+
voice = voice or self.get_voice("female")
|
|
87
|
+
speed_val = speed or self.config.speed
|
|
88
|
+
|
|
89
|
+
# 将 speed (0.5-2.0) 转换为 edge-tts 的 rate 格式
|
|
90
|
+
# speed=1.0 -> rate="+0%"
|
|
91
|
+
# speed=0.7 -> rate="-30%" (更慢,适合学习)
|
|
92
|
+
# speed=1.5 -> rate="+50%" (更快)
|
|
93
|
+
rate_percent = int((speed_val - 1.0) * 100)
|
|
94
|
+
rate = f"{rate_percent:+d}%"
|
|
95
|
+
|
|
96
|
+
# 确保输出目录存在
|
|
97
|
+
output_path = Path(output_path)
|
|
98
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
99
|
+
|
|
100
|
+
async def _synthesize_async():
|
|
101
|
+
"""异步合成语音"""
|
|
102
|
+
communicate = edge_tts.Communicate(text, voice, rate=rate)
|
|
103
|
+
await communicate.save(str(output_path))
|
|
104
|
+
|
|
105
|
+
try:
|
|
106
|
+
# 在同步上下文中运行异步代码
|
|
107
|
+
asyncio.run(_synthesize_async())
|
|
108
|
+
return TTSResult(success=True, audio_path=output_path)
|
|
109
|
+
|
|
110
|
+
except Exception as e:
|
|
111
|
+
return TTSResult(success=False, error_message=str(e))
|
|
@@ -0,0 +1,205 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
讯飞 TTS Provider 实现
|
|
4
|
+
|
|
5
|
+
讯飞开放平台语音合成服务:
|
|
6
|
+
- 每日 500 分钟免费额度
|
|
7
|
+
- 国内网络稳定直连
|
|
8
|
+
- 支持美式英语发音人(catherine, henry)
|
|
9
|
+
- WebSocket 流式接口
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
import websocket
|
|
13
|
+
import datetime
|
|
14
|
+
import hashlib
|
|
15
|
+
import base64
|
|
16
|
+
import hmac
|
|
17
|
+
import json
|
|
18
|
+
import os
|
|
19
|
+
import ssl
|
|
20
|
+
import certifi
|
|
21
|
+
from pathlib import Path
|
|
22
|
+
from typing import Optional, ClassVar, Dict
|
|
23
|
+
from urllib.parse import urlencode
|
|
24
|
+
|
|
25
|
+
from ..base import TTSProvider, TTSConfig, TTSResult
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class XunFeiProvider(TTSProvider):
|
|
29
|
+
"""
|
|
30
|
+
讯飞开放平台 TTS Provider
|
|
31
|
+
|
|
32
|
+
支持的美式英语发音人:
|
|
33
|
+
- catherine: 女声,自然流畅(推荐)
|
|
34
|
+
- henry: 男声,沉稳专业
|
|
35
|
+
- mary: 女声,新闻播报
|
|
36
|
+
- john: 男声,活力阳光
|
|
37
|
+
|
|
38
|
+
环境变量配置:
|
|
39
|
+
- XUNFEI_APPID: 应用 ID
|
|
40
|
+
- XUNFEI_API_KEY: API Key
|
|
41
|
+
- XUNFEI_API_SECRET: API Secret
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
PROVIDER_NAME: ClassVar[str] = "xunfei"
|
|
45
|
+
DEFAULT_FEMALE_VOICE: ClassVar[str] = "catherine" # 美式英语女声
|
|
46
|
+
DEFAULT_MALE_VOICE: ClassVar[str] = "henry" # 美式英语男声
|
|
47
|
+
# 角色音色映射:旁白-女声,对话A-男声,对话B-女声
|
|
48
|
+
DEFAULT_NARRATOR_VOICE: ClassVar[str] = "catherine" # 旁白 - 女声
|
|
49
|
+
DEFAULT_DIALOGUE_A_VOICE: ClassVar[str] = "henry" # 对话 A - 男声
|
|
50
|
+
DEFAULT_DIALOGUE_B_VOICE: ClassVar[str] = "catherine" # 对话 B - 女声
|
|
51
|
+
|
|
52
|
+
SUPPORTED_VOICES: ClassVar[Dict[str, str]] = {
|
|
53
|
+
"catherine": "美式英语女声,自然流畅(推荐)",
|
|
54
|
+
"henry": "美式英语男声,沉稳专业",
|
|
55
|
+
"mary": "美式英语女声,新闻播报",
|
|
56
|
+
"john": "美式英语男声,活力阳光",
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
def _validate_credentials(self) -> None:
|
|
60
|
+
"""
|
|
61
|
+
验证讯飞认证信息
|
|
62
|
+
|
|
63
|
+
优先使用传入的 credentials,其次从环境变量读取。
|
|
64
|
+
|
|
65
|
+
Raises:
|
|
66
|
+
ValueError: 认证信息缺失
|
|
67
|
+
"""
|
|
68
|
+
required = ["appid", "api_key", "api_secret"]
|
|
69
|
+
for key in required:
|
|
70
|
+
if key not in self.credentials:
|
|
71
|
+
# 尝试从环境变量读取
|
|
72
|
+
env_key = f"XUNFEI_{key.upper()}"
|
|
73
|
+
if env_key in os.environ:
|
|
74
|
+
self.credentials[key] = os.environ[env_key]
|
|
75
|
+
else:
|
|
76
|
+
raise ValueError(
|
|
77
|
+
f"Missing required credential: {key}. "
|
|
78
|
+
f"Set XUNFEI_{key.upper()} environment variable or pass it to constructor."
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
def _create_auth_url(self) -> str:
|
|
82
|
+
"""
|
|
83
|
+
生成 WebSocket 鉴权 URL
|
|
84
|
+
|
|
85
|
+
Returns:
|
|
86
|
+
带鉴权参数的 WebSocket URL
|
|
87
|
+
"""
|
|
88
|
+
date = datetime.datetime.now().strftime('%a, %d %b %Y %H:%M:%S GMT')
|
|
89
|
+
signature_origin = f"host: tts-api.xfyun.cn\ndate: {date}\nGET /v2/tts HTTP/1.1"
|
|
90
|
+
signature_sha = hmac.new(
|
|
91
|
+
self.credentials["api_secret"].encode('utf-8'),
|
|
92
|
+
signature_origin.encode('utf-8'),
|
|
93
|
+
digestmod=hashlib.sha256
|
|
94
|
+
).digest()
|
|
95
|
+
signature = base64.b64encode(signature_sha).decode()
|
|
96
|
+
authorization = base64.b64encode(
|
|
97
|
+
f'api_key="{self.credentials["api_key"]}", algorithm="hmac-sha256", '
|
|
98
|
+
f'headers="host date request-line", signature="{signature}"'.encode()
|
|
99
|
+
).decode()
|
|
100
|
+
|
|
101
|
+
params = urlencode({
|
|
102
|
+
"authorization": authorization,
|
|
103
|
+
"date": date,
|
|
104
|
+
"host": "tts-api.xfyun.cn"
|
|
105
|
+
})
|
|
106
|
+
return f'wss://tts-api.xfyun.cn/v2/tts?{params}'
|
|
107
|
+
|
|
108
|
+
def synthesize(
|
|
109
|
+
self,
|
|
110
|
+
text: str,
|
|
111
|
+
output_path: Path,
|
|
112
|
+
voice: Optional[str] = None,
|
|
113
|
+
speed: Optional[float] = None
|
|
114
|
+
) -> TTSResult:
|
|
115
|
+
"""
|
|
116
|
+
合成语音
|
|
117
|
+
|
|
118
|
+
Args:
|
|
119
|
+
text: 要合成的文本
|
|
120
|
+
output_path: 输出文件路径(.mp3)
|
|
121
|
+
voice: 语音 ID(可选,默认使用女声)
|
|
122
|
+
speed: 语速(可选,0.5-2.0,1.0 = 正常)
|
|
123
|
+
|
|
124
|
+
Returns:
|
|
125
|
+
TTSResult: 合成结果
|
|
126
|
+
"""
|
|
127
|
+
voice = voice or self.get_voice("female")
|
|
128
|
+
# 讯飞语速范围 0-100,50 为正常
|
|
129
|
+
speed_val = speed or self.config.speed
|
|
130
|
+
speed_int = int(speed_val * 50)
|
|
131
|
+
|
|
132
|
+
audio_data = bytearray()
|
|
133
|
+
error_msg = None
|
|
134
|
+
|
|
135
|
+
def on_message(ws, message):
|
|
136
|
+
nonlocal error_msg
|
|
137
|
+
try:
|
|
138
|
+
data = json.loads(message)
|
|
139
|
+
if data.get("code") == 0:
|
|
140
|
+
audio = data.get("data", {}).get("audio", "")
|
|
141
|
+
status = data.get("data", {}).get("status", 0)
|
|
142
|
+
if audio:
|
|
143
|
+
audio_data.extend(base64.b64decode(audio))
|
|
144
|
+
# status=2 表示合成完成,关闭连接
|
|
145
|
+
if status == 2:
|
|
146
|
+
ws.close()
|
|
147
|
+
else:
|
|
148
|
+
error_msg = f"XunFei API error: code={data.get('code')}, message={data.get('message')}"
|
|
149
|
+
except json.JSONDecodeError as e:
|
|
150
|
+
error_msg = f"JSON decode error: {e}"
|
|
151
|
+
|
|
152
|
+
def on_error(ws, error):
|
|
153
|
+
nonlocal error_msg
|
|
154
|
+
error_msg = str(error)
|
|
155
|
+
|
|
156
|
+
def on_open(ws):
|
|
157
|
+
request = {
|
|
158
|
+
"common": {"app_id": self.credentials["appid"]},
|
|
159
|
+
"business": {
|
|
160
|
+
"aue": "lame", # MP3 格式
|
|
161
|
+
"sfl": 1, # 开启流式返回
|
|
162
|
+
"auf": "audio/L16;rate=16000",
|
|
163
|
+
"vcn": voice, # 发音人
|
|
164
|
+
"speed": speed_int, # 语速
|
|
165
|
+
"volume": 50, # 音量
|
|
166
|
+
"pitch": 50, # 音调
|
|
167
|
+
},
|
|
168
|
+
"data": {
|
|
169
|
+
"status": 2, # 一次性传输
|
|
170
|
+
"text": str(base64.b64encode(text.encode('utf-8')), "UTF8")
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
ws.send(json.dumps(request))
|
|
174
|
+
|
|
175
|
+
try:
|
|
176
|
+
ws_url = self._create_auth_url()
|
|
177
|
+
ws = websocket.WebSocketApp(
|
|
178
|
+
ws_url,
|
|
179
|
+
on_message=on_message,
|
|
180
|
+
on_error=on_error,
|
|
181
|
+
)
|
|
182
|
+
ws.on_open = on_open
|
|
183
|
+
# 使用 certifi 提供的 SSL 证书
|
|
184
|
+
ws.run_forever(
|
|
185
|
+
sslopt={"cert_reqs": ssl.CERT_REQUIRED, "ca_certs": certifi.where()}
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
if error_msg:
|
|
189
|
+
return TTSResult(success=False, error_message=error_msg)
|
|
190
|
+
|
|
191
|
+
if not audio_data:
|
|
192
|
+
return TTSResult(success=False, error_message="No audio data received")
|
|
193
|
+
|
|
194
|
+
# 确保输出目录存在
|
|
195
|
+
output_path = Path(output_path)
|
|
196
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
197
|
+
|
|
198
|
+
# 保存音频文件
|
|
199
|
+
with open(output_path, "wb") as f:
|
|
200
|
+
f.write(audio_data)
|
|
201
|
+
|
|
202
|
+
return TTSResult(success=True, audio_path=output_path)
|
|
203
|
+
|
|
204
|
+
except Exception as e:
|
|
205
|
+
return TTSResult(success=False, error_message=str(e))
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
共享音频工具函数
|
|
4
|
+
|
|
5
|
+
提供音频处理相关的通用功能,避免代码重复。
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import re
|
|
9
|
+
import shutil
|
|
10
|
+
import subprocess
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import Optional
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def get_ffmpeg_path() -> str:
|
|
16
|
+
"""
|
|
17
|
+
获取 FFmpeg 可执行文件路径
|
|
18
|
+
|
|
19
|
+
Returns:
|
|
20
|
+
FFmpeg 路径
|
|
21
|
+
|
|
22
|
+
Raises:
|
|
23
|
+
RuntimeError: 如果未找到 FFmpeg
|
|
24
|
+
"""
|
|
25
|
+
ffmpeg_path = shutil.which("ffmpeg")
|
|
26
|
+
if not ffmpeg_path:
|
|
27
|
+
raise RuntimeError(
|
|
28
|
+
"ffmpeg not found. Install it with: brew install ffmpeg (macOS) "
|
|
29
|
+
"or apt-get install ffmpeg (Ubuntu)"
|
|
30
|
+
)
|
|
31
|
+
return ffmpeg_path
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def get_audio_duration(audio_path: Path, ffmpeg_path: Optional[str] = None) -> float:
|
|
35
|
+
"""
|
|
36
|
+
获取音频文件时长
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
audio_path: 音频文件路径
|
|
40
|
+
ffmpeg_path: FFmpeg 可执行文件路径(可选,默认自动检测)
|
|
41
|
+
|
|
42
|
+
Returns:
|
|
43
|
+
时长(秒),如果无法解析则返回 0.0
|
|
44
|
+
"""
|
|
45
|
+
if ffmpeg_path is None:
|
|
46
|
+
ffmpeg_path = get_ffmpeg_path()
|
|
47
|
+
|
|
48
|
+
cmd = [
|
|
49
|
+
ffmpeg_path,
|
|
50
|
+
"-i", str(audio_path),
|
|
51
|
+
"-hide_banner",
|
|
52
|
+
"-f", "null",
|
|
53
|
+
"-"
|
|
54
|
+
]
|
|
55
|
+
|
|
56
|
+
result = subprocess.run(cmd, capture_output=True, text=True)
|
|
57
|
+
|
|
58
|
+
# 从 stderr 中解析时长,格式: " Duration: 00:00:03.45, ..."
|
|
59
|
+
match = re.search(r"Duration: (\d+):(\d+):(\d+\.?\d*)", result.stderr)
|
|
60
|
+
if match:
|
|
61
|
+
hours, minutes, seconds = match.groups()
|
|
62
|
+
return int(hours) * 3600 + int(minutes) * 60 + float(seconds)
|
|
63
|
+
return 0.0
|