@rookiestar/eng-lang-tutor 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. package/.claude/settings.local.json +22 -0
  2. package/.gitignore +32 -0
  3. package/CHANGELOG.md +37 -0
  4. package/CLAUDE.md +275 -0
  5. package/README.md +369 -0
  6. package/SKILL.md +613 -0
  7. package/bin/eng-lang-tutor.js +177 -0
  8. package/docs/OPENCLAW_DEPLOYMENT.md +241 -0
  9. package/examples/sample_keypoint_a1.json +112 -0
  10. package/examples/sample_keypoint_a2.json +124 -0
  11. package/examples/sample_keypoint_b1.json +135 -0
  12. package/examples/sample_keypoint_b2.json +137 -0
  13. package/examples/sample_keypoint_c1.json +134 -0
  14. package/examples/sample_keypoint_c2.json +141 -0
  15. package/examples/sample_quiz_a1.json +94 -0
  16. package/examples/sample_quiz_a2.json +94 -0
  17. package/examples/sample_quiz_b1.json +92 -0
  18. package/examples/sample_quiz_b2.json +94 -0
  19. package/examples/sample_quiz_c1.json +94 -0
  20. package/examples/sample_quiz_c2.json +104 -0
  21. package/package.json +41 -0
  22. package/references/resources.md +292 -0
  23. package/requirements.txt +16 -0
  24. package/scripts/__init__.py +28 -0
  25. package/scripts/audio/__init__.py +23 -0
  26. package/scripts/audio/composer.py +367 -0
  27. package/scripts/audio/converter.py +331 -0
  28. package/scripts/audio/feishu_voice.py +404 -0
  29. package/scripts/audio/tts/__init__.py +30 -0
  30. package/scripts/audio/tts/base.py +166 -0
  31. package/scripts/audio/tts/manager.py +306 -0
  32. package/scripts/audio/tts/providers/__init__.py +12 -0
  33. package/scripts/audio/tts/providers/edge.py +111 -0
  34. package/scripts/audio/tts/providers/xunfei.py +205 -0
  35. package/scripts/audio/utils.py +63 -0
  36. package/scripts/cli/__init__.py +7 -0
  37. package/scripts/cli/cli.py +229 -0
  38. package/scripts/cli/command_parser.py +336 -0
  39. package/scripts/core/__init__.py +30 -0
  40. package/scripts/core/constants.py +125 -0
  41. package/scripts/core/error_notebook.py +308 -0
  42. package/scripts/core/gamification.py +405 -0
  43. package/scripts/core/scorer.py +295 -0
  44. package/scripts/core/state_manager.py +814 -0
  45. package/scripts/eng-lang-tutor +16 -0
  46. package/scripts/scheduling/__init__.py +6 -0
  47. package/scripts/scheduling/cron_push.py +229 -0
  48. package/scripts/utils/__init__.py +12 -0
  49. package/scripts/utils/dedup.py +331 -0
  50. package/scripts/utils/helpers.py +82 -0
  51. package/templates/keypoint_schema.json +420 -0
  52. package/templates/prompt_templates.md +73 -0
  53. package/templates/prompts/display_guide.md +106 -0
  54. package/templates/prompts/initialization.md +350 -0
  55. package/templates/prompts/keypoint_generation.md +272 -0
  56. package/templates/prompts/output_rules.md +106 -0
  57. package/templates/prompts/quiz_generation.md +190 -0
  58. package/templates/prompts/responses.md +339 -0
  59. package/templates/prompts/shared_enums.md +252 -0
  60. package/templates/quiz_schema.json +214 -0
  61. package/templates/state_schema.json +277 -0
@@ -0,0 +1,306 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ TTS 管理器 - 通用入口,支持多 Provider
4
+
5
+ 提供统一的 TTS 接口,支持切换不同的 TTS 服务提供商。
6
+
7
+ 使用示例:
8
+ # 方式 1:从环境变量读取配置(默认使用 Edge-TTS)
9
+ manager = TTSManager.from_env()
10
+
11
+ # 方式 2:使用 Edge-TTS(默认,免费无需认证)
12
+ manager = TTSManager(provider="edge-tts")
13
+
14
+ # 方式 3:使用讯飞(需要认证)
15
+ manager = TTSManager(
16
+ provider="xunfei",
17
+ appid="xxx",
18
+ api_key="xxx",
19
+ api_secret="xxx"
20
+ )
21
+
22
+ # 合成单条语音
23
+ result = manager.synthesize("Hello", Path("output.mp3"))
24
+
25
+ # 为知识点生成所有音频
26
+ audio_info = manager.generate_keypoint_audio(keypoint)
27
+ """
28
+
29
+ from pathlib import Path
30
+ from typing import Dict, Any, Optional, Type, ClassVar
31
+ from datetime import date, datetime
32
+ import os
33
+ import sys
34
+
35
+ # 添加 scripts 目录到路径以导入 state_manager
36
+ sys.path.insert(0, str(Path(__file__).parent.parent.parent))
37
+
38
+ from .base import TTSProvider, TTSConfig, TTSResult
39
+ from .providers.xunfei import XunFeiProvider
40
+ from .providers.edge import EdgeTTSProvider
41
+
42
+ try:
43
+ from ...core.state_manager import get_default_state_dir
44
+ except ImportError:
45
+ from scripts.core.state_manager import get_default_state_dir
46
+
47
+
48
+ # Provider 注册表
49
+ PROVIDERS: Dict[str, Type[TTSProvider]] = {
50
+ "edge-tts": EdgeTTSProvider, # 默认推荐
51
+ "xunfei": XunFeiProvider, # 备选方案
52
+ }
53
+
54
+
55
+ class TTSManager:
56
+ """
57
+ TTS 管理器 - 统一入口
58
+
59
+ 提供统一的 TTS 接口,支持:
60
+ - 多 Provider 切换
61
+ - 环境变量配置
62
+ - 知识点音频批量生成
63
+ """
64
+
65
+ # 支持的 Provider 列表
66
+ SUPPORTED_PROVIDERS: ClassVar[list] = list(PROVIDERS.keys())
67
+
68
+ def __init__(
69
+ self,
70
+ provider: str = "edge-tts",
71
+ data_dir: str = None,
72
+ config: Optional[TTSConfig] = None,
73
+ **credentials
74
+ ):
75
+ """
76
+ 初始化 TTS 管理器
77
+
78
+ Args:
79
+ provider: Provider 名称(目前仅支持 "xunfei")
80
+ data_dir: 数据目录(默认使用 OPENCLAW_STATE_DIR 或 ~/.openclaw/state/eng-lang-tutor/)
81
+ config: TTS 配置
82
+ **credentials: Provider 认证信息
83
+
84
+ 示例:
85
+ # 讯飞(使用默认数据目录)
86
+ manager = TTSManager(provider="xunfei")
87
+
88
+ # 讯飞(直接传入密钥)
89
+ manager = TTSManager(
90
+ provider="xunfei",
91
+ appid="xxx",
92
+ api_key="xxx",
93
+ api_secret="xxx"
94
+ )
95
+ """
96
+ if provider not in PROVIDERS:
97
+ raise ValueError(
98
+ f"Unknown provider: {provider}. "
99
+ f"Available: {list(PROVIDERS.keys())}"
100
+ )
101
+
102
+ # 使用与 StateManager 相同的默认目录逻辑
103
+ if data_dir is None:
104
+ self.data_dir = get_default_state_dir()
105
+ else:
106
+ self.data_dir = Path(data_dir)
107
+
108
+ self.audio_dir = self.data_dir / "audio"
109
+ self.audio_dir.mkdir(parents=True, exist_ok=True)
110
+
111
+ self.provider_name = provider
112
+ self.config = config or TTSConfig()
113
+
114
+ # 初始化 Provider
115
+ self.provider: TTSProvider = PROVIDERS[provider](
116
+ config=self.config,
117
+ **credentials
118
+ )
119
+
120
+ @classmethod
121
+ def from_env(cls, provider: Optional[str] = None, **kwargs) -> "TTSManager":
122
+ """
123
+ 从环境变量创建 TTS 管理器
124
+
125
+ 环境变量格式:
126
+ TTS_PROVIDER=xunfei
127
+ XUNFEI_APPID=xxx
128
+ XUNFEI_API_KEY=xxx
129
+ XUNFEI_API_SECRET=xxx
130
+
131
+ Args:
132
+ provider: Provider 名称(可选,默认从 TTS_PROVIDER 环境变量读取)
133
+ **kwargs: 其他参数传递给构造函数
134
+
135
+ Returns:
136
+ TTSManager 实例
137
+ """
138
+ provider = provider or os.getenv("TTS_PROVIDER", "edge-tts")
139
+ return cls(provider=provider, **kwargs)
140
+
141
+ def switch_provider(self, provider: str, **credentials) -> None:
142
+ """
143
+ 切换 Provider
144
+
145
+ Args:
146
+ provider: Provider 名称
147
+ **credentials: 新 Provider 的认证信息
148
+ """
149
+ if provider not in PROVIDERS:
150
+ raise ValueError(
151
+ f"Unknown provider: {provider}. "
152
+ f"Available: {list(PROVIDERS.keys())}"
153
+ )
154
+
155
+ self.provider_name = provider
156
+ self.provider = PROVIDERS[provider](
157
+ config=self.config,
158
+ **credentials
159
+ )
160
+
161
+ def synthesize(
162
+ self,
163
+ text: str,
164
+ output_path: Path,
165
+ voice: Optional[str] = None,
166
+ speed: Optional[float] = None
167
+ ) -> TTSResult:
168
+ """
169
+ 合成单条语音
170
+
171
+ Args:
172
+ text: 要合成的文本
173
+ output_path: 输出文件路径
174
+ voice: 语音 ID(可选)
175
+ speed: 语速(可选,0.5-2.0)
176
+
177
+ Returns:
178
+ TTSResult: 合成结果
179
+ """
180
+ return self.provider.synthesize(text, output_path, voice, speed)
181
+
182
+ def generate_keypoint_audio(
183
+ self,
184
+ keypoint: Dict[str, Any],
185
+ target_date: Optional[date] = None
186
+ ) -> Dict[str, Any]:
187
+ """
188
+ 为知识点生成所有音频
189
+
190
+ 生成内容:
191
+ - 对话音频(按角色分文件,A=女声,B=男声)
192
+ - 表达音频(语速更慢,适合学习)
193
+
194
+ Args:
195
+ keypoint: 知识点数据
196
+ target_date: 目标日期(可选,默认今天)
197
+
198
+ Returns:
199
+ 音频信息字典,包含 dialogue 和 expressions 列表
200
+ """
201
+ target_date = target_date or date.today()
202
+ date_str = target_date.strftime('%Y-%m-%d')
203
+
204
+ date_audio_dir = self.audio_dir / date_str
205
+ date_audio_dir.mkdir(parents=True, exist_ok=True)
206
+
207
+ audio_info = {
208
+ "dialogue": [],
209
+ "expressions": [],
210
+ "generated_at": datetime.now().isoformat(),
211
+ "provider": self.provider_name
212
+ }
213
+
214
+ # 1. 生成对话音频
215
+ for i, example in enumerate(keypoint.get("examples", [])):
216
+ for j, line in enumerate(example.get("dialogue", [])):
217
+ if ":" in line:
218
+ speaker, text = line.split(":", 1)
219
+ speaker = speaker.strip()
220
+ text = text.strip()
221
+
222
+ if not text:
223
+ continue
224
+
225
+ # A = 男声,B = 女声
226
+ gender = "male" if speaker.upper() == "A" else "female"
227
+ voice = self.provider.get_voice(gender)
228
+ output_path = date_audio_dir / f"dialogue_{i}_{j}_{speaker}.mp3"
229
+
230
+ result = self.synthesize(
231
+ text=text,
232
+ output_path=output_path,
233
+ voice=voice
234
+ )
235
+
236
+ if result.success:
237
+ audio_info["dialogue"].append({
238
+ "speaker": speaker,
239
+ "text": text,
240
+ "audio_url": f"audio/{date_str}/{output_path.name}"
241
+ })
242
+ else:
243
+ # 记录错误但不中断
244
+ audio_info["dialogue"].append({
245
+ "speaker": speaker,
246
+ "text": text,
247
+ "error": result.error_message
248
+ })
249
+
250
+ # 2. 生成表达音频(语速更慢)
251
+ for i, expr in enumerate(keypoint.get("expressions", [])):
252
+ phrase = expr.get("phrase", "")
253
+ if not phrase:
254
+ continue
255
+
256
+ output_path = date_audio_dir / f"expression_{i+1}.mp3"
257
+ result = self.synthesize(
258
+ text=phrase,
259
+ output_path=output_path,
260
+ speed=0.7 # 更慢语速,适合学习
261
+ )
262
+
263
+ if result.success:
264
+ audio_info["expressions"].append({
265
+ "text": phrase,
266
+ "audio_url": f"audio/{date_str}/{output_path.name}"
267
+ })
268
+ else:
269
+ audio_info["expressions"].append({
270
+ "text": phrase,
271
+ "error": result.error_message
272
+ })
273
+
274
+ return audio_info
275
+
276
+ @classmethod
277
+ def list_supported_providers(cls) -> list:
278
+ """
279
+ 列出所有支持的 Provider
280
+
281
+ Returns:
282
+ Provider 名称列表
283
+ """
284
+ return list(PROVIDERS.keys())
285
+
286
+ def list_voices(self) -> Dict[str, str]:
287
+ """
288
+ 列出当前 Provider 支持的语音
289
+
290
+ Returns:
291
+ 语音 ID -> 描述 的字典
292
+ """
293
+ return self.provider.list_voices()
294
+
295
+ def get_audio_path(self, date_str: str, filename: str) -> Path:
296
+ """
297
+ 获取音频文件的完整路径
298
+
299
+ Args:
300
+ date_str: 日期字符串 (YYYY-MM-DD)
301
+ filename: 文件名
302
+
303
+ Returns:
304
+ 完整文件路径
305
+ """
306
+ return self.audio_dir / date_str / filename
@@ -0,0 +1,12 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ TTS Providers - TTS 服务提供者实现
4
+ """
5
+
6
+ from .xunfei import XunFeiProvider
7
+ from .edge import EdgeTTSProvider
8
+
9
+ __all__ = [
10
+ "XunFeiProvider",
11
+ "EdgeTTSProvider",
12
+ ]
@@ -0,0 +1,111 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Edge-TTS Provider 实现
4
+
5
+ Microsoft Edge TTS 服务:
6
+ - 完全免费,无需 API 密钥
7
+ - 高质量 24kHz 神经语音
8
+ - 支持多种美式英语发音人
9
+ - 国内网络可能需要代理
10
+ """
11
+
12
+ import asyncio
13
+ import edge_tts
14
+ from pathlib import Path
15
+ from typing import Optional, ClassVar, Dict
16
+
17
+ from ..base import TTSProvider, TTSConfig, TTSResult
18
+
19
+
20
+ class EdgeTTSProvider(TTSProvider):
21
+ """
22
+ Microsoft Edge TTS Provider
23
+
24
+ 支持的美式英语发音人:
25
+ - en-US-JennyNeural: 女声,友好亲切(推荐)
26
+ - en-US-AriaNeural: 女声,自信清晰
27
+ - en-US-EricNeural: 男声,专业理性(推荐)
28
+ - en-US-GuyNeural: 男声,热情活力
29
+ - en-US-AnaNeural: 女声,可爱随和
30
+ - en-US-ChristopherNeural: 男声,权威可靠
31
+
32
+ 无需认证信息,直接使用。
33
+ """
34
+
35
+ PROVIDER_NAME: ClassVar[str] = "edge-tts"
36
+ DEFAULT_FEMALE_VOICE: ClassVar[str] = "en-US-JennyNeural" # 友好亲切
37
+ DEFAULT_MALE_VOICE: ClassVar[str] = "en-US-EricNeural" # 专业理性
38
+ # 角色音色映射:旁白-女声,对话A-男声,对话B-女声
39
+ DEFAULT_NARRATOR_VOICE: ClassVar[str] = "en-US-JennyNeural" # 旁白 - 女声
40
+ DEFAULT_DIALOGUE_A_VOICE: ClassVar[str] = "en-US-EricNeural" # 对话 A - 男声
41
+ DEFAULT_DIALOGUE_B_VOICE: ClassVar[str] = "en-US-JennyNeural" # 对话 B - 女声
42
+
43
+ SUPPORTED_VOICES: ClassVar[Dict[str, str]] = {
44
+ "en-US-JennyNeural": "美式英语女声,友好亲切(推荐)",
45
+ "en-US-AriaNeural": "美式英语女声,自信清晰",
46
+ "en-US-EricNeural": "美式英语男声,专业理性(推荐)",
47
+ "en-US-GuyNeural": "美式英语男声,热情活力",
48
+ "en-US-AnaNeural": "美式英语女声,可爱随和",
49
+ "en-US-ChristopherNeural": "美式英语男声,权威可靠",
50
+ "en-US-MichelleNeural": "美式英语女声,友好舒适",
51
+ "en-US-RogerNeural": "美式英语男声,生动活泼",
52
+ "en-US-AndrewNeural": "美式英语男声,友好积极",
53
+ "en-US-BrianNeural": "美式英语男声,友好积极",
54
+ "en-US-EmmaNeural": "美式英语女声,友好积极",
55
+ "en-US-AvaNeural": "美式英语女声,友好积极",
56
+ }
57
+
58
+ def _validate_credentials(self) -> None:
59
+ """
60
+ 验证认证信息
61
+
62
+ Edge-TTS 不需要认证信息,直接通过。
63
+ """
64
+ # Edge-TTS 不需要任何认证信息
65
+ pass
66
+
67
+ def synthesize(
68
+ self,
69
+ text: str,
70
+ output_path: Path,
71
+ voice: Optional[str] = None,
72
+ speed: Optional[float] = None
73
+ ) -> TTSResult:
74
+ """
75
+ 合成语音
76
+
77
+ Args:
78
+ text: 要合成的文本
79
+ output_path: 输出文件路径(.mp3)
80
+ voice: 语音 ID(可选,默认使用女声)
81
+ speed: 语速(可选,0.5-2.0,1.0 = 正常)
82
+
83
+ Returns:
84
+ TTSResult: 合成结果
85
+ """
86
+ voice = voice or self.get_voice("female")
87
+ speed_val = speed or self.config.speed
88
+
89
+ # 将 speed (0.5-2.0) 转换为 edge-tts 的 rate 格式
90
+ # speed=1.0 -> rate="+0%"
91
+ # speed=0.7 -> rate="-30%" (更慢,适合学习)
92
+ # speed=1.5 -> rate="+50%" (更快)
93
+ rate_percent = int((speed_val - 1.0) * 100)
94
+ rate = f"{rate_percent:+d}%"
95
+
96
+ # 确保输出目录存在
97
+ output_path = Path(output_path)
98
+ output_path.parent.mkdir(parents=True, exist_ok=True)
99
+
100
+ async def _synthesize_async():
101
+ """异步合成语音"""
102
+ communicate = edge_tts.Communicate(text, voice, rate=rate)
103
+ await communicate.save(str(output_path))
104
+
105
+ try:
106
+ # 在同步上下文中运行异步代码
107
+ asyncio.run(_synthesize_async())
108
+ return TTSResult(success=True, audio_path=output_path)
109
+
110
+ except Exception as e:
111
+ return TTSResult(success=False, error_message=str(e))
@@ -0,0 +1,205 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ 讯飞 TTS Provider 实现
4
+
5
+ 讯飞开放平台语音合成服务:
6
+ - 每日 500 分钟免费额度
7
+ - 国内网络稳定直连
8
+ - 支持美式英语发音人(catherine, henry)
9
+ - WebSocket 流式接口
10
+ """
11
+
12
+ import websocket
13
+ import datetime
14
+ import hashlib
15
+ import base64
16
+ import hmac
17
+ import json
18
+ import os
19
+ import ssl
20
+ import certifi
21
+ from pathlib import Path
22
+ from typing import Optional, ClassVar, Dict
23
+ from urllib.parse import urlencode
24
+
25
+ from ..base import TTSProvider, TTSConfig, TTSResult
26
+
27
+
28
+ class XunFeiProvider(TTSProvider):
29
+ """
30
+ 讯飞开放平台 TTS Provider
31
+
32
+ 支持的美式英语发音人:
33
+ - catherine: 女声,自然流畅(推荐)
34
+ - henry: 男声,沉稳专业
35
+ - mary: 女声,新闻播报
36
+ - john: 男声,活力阳光
37
+
38
+ 环境变量配置:
39
+ - XUNFEI_APPID: 应用 ID
40
+ - XUNFEI_API_KEY: API Key
41
+ - XUNFEI_API_SECRET: API Secret
42
+ """
43
+
44
+ PROVIDER_NAME: ClassVar[str] = "xunfei"
45
+ DEFAULT_FEMALE_VOICE: ClassVar[str] = "catherine" # 美式英语女声
46
+ DEFAULT_MALE_VOICE: ClassVar[str] = "henry" # 美式英语男声
47
+ # 角色音色映射:旁白-女声,对话A-男声,对话B-女声
48
+ DEFAULT_NARRATOR_VOICE: ClassVar[str] = "catherine" # 旁白 - 女声
49
+ DEFAULT_DIALOGUE_A_VOICE: ClassVar[str] = "henry" # 对话 A - 男声
50
+ DEFAULT_DIALOGUE_B_VOICE: ClassVar[str] = "catherine" # 对话 B - 女声
51
+
52
+ SUPPORTED_VOICES: ClassVar[Dict[str, str]] = {
53
+ "catherine": "美式英语女声,自然流畅(推荐)",
54
+ "henry": "美式英语男声,沉稳专业",
55
+ "mary": "美式英语女声,新闻播报",
56
+ "john": "美式英语男声,活力阳光",
57
+ }
58
+
59
+ def _validate_credentials(self) -> None:
60
+ """
61
+ 验证讯飞认证信息
62
+
63
+ 优先使用传入的 credentials,其次从环境变量读取。
64
+
65
+ Raises:
66
+ ValueError: 认证信息缺失
67
+ """
68
+ required = ["appid", "api_key", "api_secret"]
69
+ for key in required:
70
+ if key not in self.credentials:
71
+ # 尝试从环境变量读取
72
+ env_key = f"XUNFEI_{key.upper()}"
73
+ if env_key in os.environ:
74
+ self.credentials[key] = os.environ[env_key]
75
+ else:
76
+ raise ValueError(
77
+ f"Missing required credential: {key}. "
78
+ f"Set XUNFEI_{key.upper()} environment variable or pass it to constructor."
79
+ )
80
+
81
+ def _create_auth_url(self) -> str:
82
+ """
83
+ 生成 WebSocket 鉴权 URL
84
+
85
+ Returns:
86
+ 带鉴权参数的 WebSocket URL
87
+ """
88
+ date = datetime.datetime.now().strftime('%a, %d %b %Y %H:%M:%S GMT')
89
+ signature_origin = f"host: tts-api.xfyun.cn\ndate: {date}\nGET /v2/tts HTTP/1.1"
90
+ signature_sha = hmac.new(
91
+ self.credentials["api_secret"].encode('utf-8'),
92
+ signature_origin.encode('utf-8'),
93
+ digestmod=hashlib.sha256
94
+ ).digest()
95
+ signature = base64.b64encode(signature_sha).decode()
96
+ authorization = base64.b64encode(
97
+ f'api_key="{self.credentials["api_key"]}", algorithm="hmac-sha256", '
98
+ f'headers="host date request-line", signature="{signature}"'.encode()
99
+ ).decode()
100
+
101
+ params = urlencode({
102
+ "authorization": authorization,
103
+ "date": date,
104
+ "host": "tts-api.xfyun.cn"
105
+ })
106
+ return f'wss://tts-api.xfyun.cn/v2/tts?{params}'
107
+
108
+ def synthesize(
109
+ self,
110
+ text: str,
111
+ output_path: Path,
112
+ voice: Optional[str] = None,
113
+ speed: Optional[float] = None
114
+ ) -> TTSResult:
115
+ """
116
+ 合成语音
117
+
118
+ Args:
119
+ text: 要合成的文本
120
+ output_path: 输出文件路径(.mp3)
121
+ voice: 语音 ID(可选,默认使用女声)
122
+ speed: 语速(可选,0.5-2.0,1.0 = 正常)
123
+
124
+ Returns:
125
+ TTSResult: 合成结果
126
+ """
127
+ voice = voice or self.get_voice("female")
128
+ # 讯飞语速范围 0-100,50 为正常
129
+ speed_val = speed or self.config.speed
130
+ speed_int = int(speed_val * 50)
131
+
132
+ audio_data = bytearray()
133
+ error_msg = None
134
+
135
+ def on_message(ws, message):
136
+ nonlocal error_msg
137
+ try:
138
+ data = json.loads(message)
139
+ if data.get("code") == 0:
140
+ audio = data.get("data", {}).get("audio", "")
141
+ status = data.get("data", {}).get("status", 0)
142
+ if audio:
143
+ audio_data.extend(base64.b64decode(audio))
144
+ # status=2 表示合成完成,关闭连接
145
+ if status == 2:
146
+ ws.close()
147
+ else:
148
+ error_msg = f"XunFei API error: code={data.get('code')}, message={data.get('message')}"
149
+ except json.JSONDecodeError as e:
150
+ error_msg = f"JSON decode error: {e}"
151
+
152
+ def on_error(ws, error):
153
+ nonlocal error_msg
154
+ error_msg = str(error)
155
+
156
+ def on_open(ws):
157
+ request = {
158
+ "common": {"app_id": self.credentials["appid"]},
159
+ "business": {
160
+ "aue": "lame", # MP3 格式
161
+ "sfl": 1, # 开启流式返回
162
+ "auf": "audio/L16;rate=16000",
163
+ "vcn": voice, # 发音人
164
+ "speed": speed_int, # 语速
165
+ "volume": 50, # 音量
166
+ "pitch": 50, # 音调
167
+ },
168
+ "data": {
169
+ "status": 2, # 一次性传输
170
+ "text": str(base64.b64encode(text.encode('utf-8')), "UTF8")
171
+ }
172
+ }
173
+ ws.send(json.dumps(request))
174
+
175
+ try:
176
+ ws_url = self._create_auth_url()
177
+ ws = websocket.WebSocketApp(
178
+ ws_url,
179
+ on_message=on_message,
180
+ on_error=on_error,
181
+ )
182
+ ws.on_open = on_open
183
+ # 使用 certifi 提供的 SSL 证书
184
+ ws.run_forever(
185
+ sslopt={"cert_reqs": ssl.CERT_REQUIRED, "ca_certs": certifi.where()}
186
+ )
187
+
188
+ if error_msg:
189
+ return TTSResult(success=False, error_message=error_msg)
190
+
191
+ if not audio_data:
192
+ return TTSResult(success=False, error_message="No audio data received")
193
+
194
+ # 确保输出目录存在
195
+ output_path = Path(output_path)
196
+ output_path.parent.mkdir(parents=True, exist_ok=True)
197
+
198
+ # 保存音频文件
199
+ with open(output_path, "wb") as f:
200
+ f.write(audio_data)
201
+
202
+ return TTSResult(success=True, audio_path=output_path)
203
+
204
+ except Exception as e:
205
+ return TTSResult(success=False, error_message=str(e))
@@ -0,0 +1,63 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ 共享音频工具函数
4
+
5
+ 提供音频处理相关的通用功能,避免代码重复。
6
+ """
7
+
8
+ import re
9
+ import shutil
10
+ import subprocess
11
+ from pathlib import Path
12
+ from typing import Optional
13
+
14
+
15
+ def get_ffmpeg_path() -> str:
16
+ """
17
+ 获取 FFmpeg 可执行文件路径
18
+
19
+ Returns:
20
+ FFmpeg 路径
21
+
22
+ Raises:
23
+ RuntimeError: 如果未找到 FFmpeg
24
+ """
25
+ ffmpeg_path = shutil.which("ffmpeg")
26
+ if not ffmpeg_path:
27
+ raise RuntimeError(
28
+ "ffmpeg not found. Install it with: brew install ffmpeg (macOS) "
29
+ "or apt-get install ffmpeg (Ubuntu)"
30
+ )
31
+ return ffmpeg_path
32
+
33
+
34
+ def get_audio_duration(audio_path: Path, ffmpeg_path: Optional[str] = None) -> float:
35
+ """
36
+ 获取音频文件时长
37
+
38
+ Args:
39
+ audio_path: 音频文件路径
40
+ ffmpeg_path: FFmpeg 可执行文件路径(可选,默认自动检测)
41
+
42
+ Returns:
43
+ 时长(秒),如果无法解析则返回 0.0
44
+ """
45
+ if ffmpeg_path is None:
46
+ ffmpeg_path = get_ffmpeg_path()
47
+
48
+ cmd = [
49
+ ffmpeg_path,
50
+ "-i", str(audio_path),
51
+ "-hide_banner",
52
+ "-f", "null",
53
+ "-"
54
+ ]
55
+
56
+ result = subprocess.run(cmd, capture_output=True, text=True)
57
+
58
+ # 从 stderr 中解析时长,格式: " Duration: 00:00:03.45, ..."
59
+ match = re.search(r"Duration: (\d+):(\d+):(\d+\.?\d*)", result.stderr)
60
+ if match:
61
+ hours, minutes, seconds = match.groups()
62
+ return int(hours) * 3600 + int(minutes) * 60 + float(seconds)
63
+ return 0.0
@@ -0,0 +1,7 @@
1
+ #!/usr/bin/env python3
2
+ """CLI tools: command-line interface and command parsing."""
3
+
4
+ from .cli import main
5
+ from .command_parser import CommandParser
6
+
7
+ __all__ = ['main', 'CommandParser']