@rookiestar/eng-lang-tutor 1.0.3 → 1.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of @rookiestar/eng-lang-tutor might be problematic. Click here for more details.

package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@rookiestar/eng-lang-tutor",
3
- "version": "1.0.3",
3
+ "version": "1.0.4",
4
4
  "description": "English language tutor skill for OpenClaw - Learn authentic American English expressions with gamification",
5
5
  "keywords": [
6
6
  "english",
package/requirements.txt CHANGED
@@ -1,5 +1,7 @@
1
1
  # Core dependencies
2
2
  websocket-client>=1.6.0 # For XunFei TTS WebSocket API
3
+ certifi>=2024.0.0 # SSL certificate bundle for HTTPS/WebSocket connections
4
+ aiohttp>=3.8.0 # Async HTTP client for Feishu API
3
5
 
4
6
  # Optional: Edge-TTS support (uncomment if needed)
5
7
  # edge-tts>=6.1.0
@@ -0,0 +1,389 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ 知识点音频合成器 - 将多个音频片段合并为单个文件
4
+
5
+ 音频结构:
6
+ - expressions: 引导语 [1s] 内容 [2s]
7
+ - alternatives: 引导语 [1s] 内容 [2s]
8
+ - dialogues: 引导语 [1s] 对话行1 [0.5s] 对话行2 ...
9
+
10
+ 使用示例:
11
+ from scripts.audio_composer import AudioComposer
12
+ from scripts.tts import TTSManager
13
+
14
+ tts = TTSManager.from_env()
15
+ composer = AudioComposer(tts)
16
+
17
+ result = composer.compose_keypoint_audio(keypoint, Path("output.mp3"))
18
+ """
19
+
20
+ import subprocess
21
+ import tempfile
22
+ import shutil
23
+ from pathlib import Path
24
+ from typing import Optional, List
25
+ from dataclasses import dataclass
26
+
27
+ try:
28
+ from .tts import TTSManager
29
+ except ImportError:
30
+ from tts import TTSManager
31
+
32
+
33
+ @dataclass
34
+ class CompositionResult:
35
+ """音频合成结果"""
36
+ success: bool
37
+ audio_path: Optional[Path] = None
38
+ duration_seconds: Optional[float] = None
39
+ error_message: Optional[str] = None
40
+
41
+
42
+ class AudioComposer:
43
+ """
44
+ 知识点音频合成器
45
+
46
+ 将 expressions + alternatives + dialogues 合并为单个音频文件
47
+ """
48
+
49
+ def __init__(
50
+ self,
51
+ tts_manager: TTSManager,
52
+ ffmpeg_path: Optional[str] = None
53
+ ):
54
+ """
55
+ 初始化音频合成器
56
+
57
+ Args:
58
+ tts_manager: TTS 管理器实例
59
+ ffmpeg_path: ffmpeg 可执行文件路径(默认自动检测)
60
+ """
61
+ self.tts = tts_manager
62
+ self.ffmpeg_path = ffmpeg_path or shutil.which("ffmpeg")
63
+ if not self.ffmpeg_path:
64
+ raise RuntimeError(
65
+ "ffmpeg not found. Install it with: brew install ffmpeg (macOS) "
66
+ "or apt-get install ffmpeg (Ubuntu)"
67
+ )
68
+
69
+ # 创建临时目录用于存放中间文件
70
+ self.temp_dir = Path(tempfile.mkdtemp(prefix="audio_composer_"))
71
+
72
+ def __del__(self):
73
+ """清理临时目录"""
74
+ if self.temp_dir.exists():
75
+ shutil.rmtree(self.temp_dir, ignore_errors=True)
76
+
77
+ def compose_keypoint_audio(
78
+ self,
79
+ keypoint: dict,
80
+ output_path: Path,
81
+ lead_in_silence: float = 1.0, # 引导语后留白
82
+ section_silence: float = 2.0, # 内容后留白(段落间隔)
83
+ dialogue_silence: float = 0.5, # 对话行之间留白
84
+ narrator_voice: str = "henry", # 旁白音色(男声)
85
+ voice_a: str = "mary", # 对话 A 音色(女声)
86
+ voice_b: str = "henry", # 对话 B 音色(男声,沉稳)
87
+ speed: float = 0.9 # 语速
88
+ ) -> CompositionResult:
89
+ """
90
+ 合成知识点音频
91
+
92
+ Args:
93
+ keypoint: 知识点数据
94
+ output_path: 输出文件路径
95
+ lead_in_silence: 引导语后留白时长(秒)
96
+ section_silence: 内容后留白时长(秒)
97
+ dialogue_silence: 对话行之间留白时长(秒)
98
+ narrator_voice: 旁白音色
99
+ voice_a: 对话 A 角色音色
100
+ voice_b: 对话 B 角色音色
101
+ speed: 语速
102
+
103
+ Returns:
104
+ CompositionResult: 合成结果
105
+ """
106
+ try:
107
+ output_path = Path(output_path)
108
+ output_path.parent.mkdir(parents=True, exist_ok=True)
109
+
110
+ segments: List[Path] = []
111
+ segment_index = 0
112
+
113
+ # 1. Expressions 部分
114
+ expressions = keypoint.get("expressions", [])
115
+ if expressions:
116
+ # 引导语
117
+ lead_in = self._synthesize_segment(
118
+ text="Key expressions",
119
+ voice=narrator_voice,
120
+ speed=speed,
121
+ index=segment_index
122
+ )
123
+ segments.append(lead_in)
124
+ segment_index += 1
125
+
126
+ # 引导语后留白
127
+ silence_1s = self._generate_silence(lead_in_silence)
128
+ segments.append(silence_1s)
129
+
130
+ # 内容
131
+ phrases = [expr.get("phrase", "") for expr in expressions]
132
+ content_text = ". ".join(p for p in phrases if p)
133
+ if content_text:
134
+ content = self._synthesize_segment(
135
+ text=content_text,
136
+ voice=narrator_voice,
137
+ speed=speed,
138
+ index=segment_index
139
+ )
140
+ segments.append(content)
141
+ segment_index += 1
142
+
143
+ # 内容后留白
144
+ silence_2s = self._generate_silence(section_silence)
145
+ segments.append(silence_2s)
146
+
147
+ # 2. Alternatives 部分
148
+ alternatives = keypoint.get("alternatives", [])
149
+ if alternatives:
150
+ # 引导语
151
+ lead_in = self._synthesize_segment(
152
+ text="You can also say",
153
+ voice=narrator_voice,
154
+ speed=speed,
155
+ index=segment_index
156
+ )
157
+ segments.append(lead_in)
158
+ segment_index += 1
159
+
160
+ # 引导语后留白
161
+ silence_1s = self._generate_silence(lead_in_silence)
162
+ segments.append(silence_1s)
163
+
164
+ # 内容
165
+ content_text = ". ".join(alt for alt in alternatives if alt)
166
+ if content_text:
167
+ content = self._synthesize_segment(
168
+ text=content_text,
169
+ voice=narrator_voice,
170
+ speed=speed,
171
+ index=segment_index
172
+ )
173
+ segments.append(content)
174
+ segment_index += 1
175
+
176
+ # 内容后留白
177
+ silence_2s = self._generate_silence(section_silence)
178
+ segments.append(silence_2s)
179
+
180
+ # 3. Dialogues 部分
181
+ examples = keypoint.get("examples", [])
182
+ if examples:
183
+ # 引导语
184
+ lead_in = self._synthesize_segment(
185
+ text="Dialogue",
186
+ voice=narrator_voice,
187
+ speed=speed,
188
+ index=segment_index
189
+ )
190
+ segments.append(lead_in)
191
+ segment_index += 1
192
+
193
+ # 引导语后留白
194
+ silence_1s = self._generate_silence(lead_in_silence)
195
+ segments.append(silence_1s)
196
+
197
+ # 对话内容
198
+ silence_05s = self._generate_silence(dialogue_silence)
199
+ for example in examples:
200
+ dialogue = example.get("dialogue", [])
201
+ for line in dialogue:
202
+ if ":" in line:
203
+ speaker, text = line.split(":", 1)
204
+ speaker = speaker.strip()
205
+ text = text.strip()
206
+
207
+ if not text:
208
+ continue
209
+
210
+ # A = henry (男声), B = catherine (女声)
211
+ voice = voice_a if speaker.upper() == "A" else voice_b
212
+
213
+ segment = self._synthesize_segment(
214
+ text=text,
215
+ voice=voice,
216
+ speed=speed,
217
+ index=segment_index
218
+ )
219
+ segments.append(segment)
220
+ segment_index += 1
221
+
222
+ # 对话行之间留白
223
+ segments.append(silence_05s)
224
+
225
+ if not segments:
226
+ return CompositionResult(
227
+ success=False,
228
+ error_message="No audio content to compose"
229
+ )
230
+
231
+ # 4. 拼接所有片段
232
+ final_audio = self._concatenate_segments(segments, output_path)
233
+
234
+ # 5. 获取时长
235
+ duration = self._get_duration(final_audio)
236
+
237
+ return CompositionResult(
238
+ success=True,
239
+ audio_path=final_audio,
240
+ duration_seconds=duration
241
+ )
242
+
243
+ except Exception as e:
244
+ return CompositionResult(
245
+ success=False,
246
+ error_message=str(e)
247
+ )
248
+
249
+ def _synthesize_segment(
250
+ self,
251
+ text: str,
252
+ voice: str,
253
+ speed: float,
254
+ index: int
255
+ ) -> Path:
256
+ """
257
+ 合成单个音频片段
258
+
259
+ Args:
260
+ text: 文本
261
+ voice: 音色
262
+ speed: 语速
263
+ index: 片段索引
264
+
265
+ Returns:
266
+ 音频文件路径
267
+ """
268
+ output_path = self.temp_dir / f"segment_{index}.mp3"
269
+
270
+ result = self.tts.synthesize(
271
+ text=text,
272
+ output_path=output_path,
273
+ voice=voice,
274
+ speed=speed
275
+ )
276
+
277
+ if not result.success:
278
+ raise RuntimeError(f"TTS synthesis failed: {result.error_message}")
279
+
280
+ return output_path
281
+
282
+ def _generate_silence(self, duration: float) -> Path:
283
+ """
284
+ 生成空白音频
285
+
286
+ Args:
287
+ duration: 时长(秒)
288
+
289
+ Returns:
290
+ 空白音频文件路径
291
+ """
292
+ output_path = self.temp_dir / f"silence_{duration}.mp3"
293
+
294
+ if output_path.exists():
295
+ return output_path
296
+
297
+ cmd = [
298
+ self.ffmpeg_path,
299
+ "-f", "lavfi",
300
+ "-i", f"anullsrc=r=16000:cl=mono",
301
+ "-t", str(duration),
302
+ "-y",
303
+ str(output_path)
304
+ ]
305
+
306
+ result = subprocess.run(
307
+ cmd,
308
+ capture_output=True,
309
+ text=True,
310
+ timeout=30
311
+ )
312
+
313
+ if result.returncode != 0:
314
+ raise RuntimeError(f"Failed to generate silence: {result.stderr}")
315
+
316
+ return output_path
317
+
318
+ def _concatenate_segments(
319
+ self,
320
+ segments: List[Path],
321
+ output_path: Path
322
+ ) -> Path:
323
+ """
324
+ 拼接多个音频片段
325
+
326
+ Args:
327
+ segments: 音频片段路径列表
328
+ output_path: 输出文件路径
329
+
330
+ Returns:
331
+ 拼接后的音频文件路径
332
+ """
333
+ # 创建文件列表
334
+ list_file = self.temp_dir / "concat_list.txt"
335
+ with open(list_file, "w") as f:
336
+ for seg in segments:
337
+ # 需要转义路径中的特殊字符
338
+ escaped_path = str(seg).replace("'", "'\\''")
339
+ f.write(f"file '{escaped_path}'\n")
340
+
341
+ cmd = [
342
+ self.ffmpeg_path,
343
+ "-f", "concat",
344
+ "-safe", "0",
345
+ "-i", str(list_file),
346
+ "-c", "copy",
347
+ "-y",
348
+ str(output_path)
349
+ ]
350
+
351
+ result = subprocess.run(
352
+ cmd,
353
+ capture_output=True,
354
+ text=True,
355
+ timeout=120
356
+ )
357
+
358
+ if result.returncode != 0:
359
+ raise RuntimeError(f"Failed to concatenate audio: {result.stderr}")
360
+
361
+ return output_path
362
+
363
+ def _get_duration(self, audio_path: Path) -> float:
364
+ """
365
+ 获取音频时长
366
+
367
+ Args:
368
+ audio_path: 音频文件路径
369
+
370
+ Returns:
371
+ 时长(秒)
372
+ """
373
+ cmd = [
374
+ self.ffmpeg_path,
375
+ "-i", str(audio_path),
376
+ "-hide_banner",
377
+ "-f", "null",
378
+ "-"
379
+ ]
380
+
381
+ result = subprocess.run(cmd, capture_output=True, text=True)
382
+
383
+ # 从 stderr 中解析时长,格式: " Duration: 00:00:03.45, ..."
384
+ import re
385
+ match = re.search(r"Duration: (\d+):(\d+):(\d+\.?\d*)", result.stderr)
386
+ if match:
387
+ hours, minutes, seconds = match.groups()
388
+ return int(hours) * 3600 + int(minutes) * 60 + float(seconds)
389
+ return 0.0
@@ -0,0 +1,245 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ 音频格式转换器 - 将 MP3 转换为飞书语音格式
4
+
5
+ 飞书语音消息要求:
6
+ - 格式: Opus / Speex / AAC / AMR
7
+ - 采样率: 8000Hz / 16000Hz
8
+ - 声道: 单声道
9
+ """
10
+
11
+ import subprocess
12
+ import shutil
13
+ from pathlib import Path
14
+ from typing import Optional
15
+ from dataclasses import dataclass
16
+
17
+
18
+ @dataclass
19
+ class ConversionResult:
20
+ """转换结果"""
21
+ success: bool
22
+ output_path: Optional[Path] = None
23
+ error_message: Optional[str] = None
24
+ duration_seconds: Optional[float] = None
25
+
26
+
27
+ class AudioConverter:
28
+ """音频格式转换器"""
29
+
30
+ # 飞书支持的语音格式
31
+ SUPPORTED_FORMATS = ["opus", "speex", "aac", "amr"]
32
+ SUPPORTED_SAMPLE_RATES = [8000, 16000]
33
+
34
+ def __init__(self, ffmpeg_path: Optional[str] = None):
35
+ """
36
+ 初始化转换器
37
+
38
+ Args:
39
+ ffmpeg_path: ffmpeg 可执行文件路径(默认自动检测)
40
+ """
41
+ self.ffmpeg_path = ffmpeg_path or shutil.which("ffmpeg")
42
+ if not self.ffmpeg_path:
43
+ raise RuntimeError(
44
+ "ffmpeg not found. Install it with: brew install ffmpeg (macOS) "
45
+ "or apt-get install ffmpeg (Ubuntu)"
46
+ )
47
+
48
+ def convert_to_voice(
49
+ self,
50
+ input_path: Path,
51
+ output_path: Optional[Path] = None,
52
+ format: str = "opus",
53
+ sample_rate: int = 16000,
54
+ bitrate: str = "24k"
55
+ ) -> ConversionResult:
56
+ """
57
+ 将音频文件转换为飞书语音格式
58
+
59
+ Args:
60
+ input_path: 输入文件路径(支持 MP3, WAV, M4A 等)
61
+ output_path: 输出文件路径(可选,默认同目录更换扩展名)
62
+ format: 输出格式(opus, speex, aac, amr)
63
+ sample_rate: 采样率(8000 或 16000)
64
+ bitrate: 比特率(默认 24k,适合语音)
65
+
66
+ Returns:
67
+ ConversionResult: 转换结果
68
+ """
69
+ # 参数验证
70
+ if format not in self.SUPPORTED_FORMATS:
71
+ return ConversionResult(
72
+ success=False,
73
+ error_message=f"Unsupported format: {format}. Supported: {self.SUPPORTED_FORMATS}"
74
+ )
75
+
76
+ if sample_rate not in self.SUPPORTED_SAMPLE_RATES:
77
+ return ConversionResult(
78
+ success=False,
79
+ error_message=f"Unsupported sample rate: {sample_rate}. Supported: {self.SUPPORTED_SAMPLE_RATES}"
80
+ )
81
+
82
+ input_path = Path(input_path)
83
+ if not input_path.exists():
84
+ return ConversionResult(
85
+ success=False,
86
+ error_message=f"Input file not found: {input_path}"
87
+ )
88
+
89
+ # 确定输出路径
90
+ if output_path is None:
91
+ output_path = input_path.with_suffix(f".{format}")
92
+ else:
93
+ output_path = Path(output_path)
94
+
95
+ # 确保输出目录存在
96
+ output_path.parent.mkdir(parents=True, exist_ok=True)
97
+
98
+ # 构建 ffmpeg 命令
99
+ codec_map = {
100
+ "opus": "libopus",
101
+ "speex": "libspeex",
102
+ "aac": "aac",
103
+ "amr": "libvo_amrwbenc"
104
+ }
105
+
106
+ cmd = [
107
+ self.ffmpeg_path,
108
+ "-i", str(input_path), # 输入文件
109
+ "-acodec", codec_map[format], # 编码器
110
+ "-ar", str(sample_rate), # 采样率
111
+ "-ac", "1", # 单声道
112
+ "-ab", bitrate, # 比特率
113
+ "-y", # 覆盖输出文件
114
+ str(output_path)
115
+ ]
116
+
117
+ # 特定格式优化
118
+ if format == "opus":
119
+ # Opus 针对语音优化
120
+ cmd.extend(["-application", "audio"])
121
+ elif format == "speex":
122
+ # Speex 针对语音优化
123
+ cmd.extend(["-compression_level", "10"])
124
+
125
+ try:
126
+ result = subprocess.run(
127
+ cmd,
128
+ capture_output=True,
129
+ text=True,
130
+ timeout=60 # 60秒超时
131
+ )
132
+
133
+ if result.returncode != 0:
134
+ return ConversionResult(
135
+ success=False,
136
+ error_message=f"ffmpeg error: {result.stderr}"
137
+ )
138
+
139
+ # 获取音频时长
140
+ duration = self._get_duration(output_path)
141
+
142
+ return ConversionResult(
143
+ success=True,
144
+ output_path=output_path,
145
+ duration_seconds=duration
146
+ )
147
+
148
+ except subprocess.TimeoutExpired:
149
+ return ConversionResult(
150
+ success=False,
151
+ error_message="Conversion timeout (>60s)"
152
+ )
153
+ except Exception as e:
154
+ return ConversionResult(
155
+ success=False,
156
+ error_message=str(e)
157
+ )
158
+
159
+ def _get_duration(self, audio_path: Path) -> float:
160
+ """获取音频时长(秒)"""
161
+ cmd = [
162
+ self.ffmpeg_path,
163
+ "-i", str(audio_path),
164
+ "-hide_banner",
165
+ "-f", "null",
166
+ "-"
167
+ ]
168
+
169
+ result = subprocess.run(cmd, capture_output=True, text=True)
170
+
171
+ # 从 stderr 中解析时长,格式: " Duration: 00:00:03.45, ..."
172
+ import re
173
+ match = re.search(r"Duration: (\d+):(\d+):(\d+\.?\d*)", result.stderr)
174
+ if match:
175
+ hours, minutes, seconds = match.groups()
176
+ return int(hours) * 3600 + int(minutes) * 60 + float(seconds)
177
+ return 0.0
178
+
179
+ def batch_convert(
180
+ self,
181
+ input_dir: Path,
182
+ output_dir: Optional[Path] = None,
183
+ format: str = "opus",
184
+ sample_rate: int = 16000
185
+ ) -> dict:
186
+ """
187
+ 批量转换目录中的音频文件
188
+
189
+ Args:
190
+ input_dir: 输入目录
191
+ output_dir: 输出目录(可选,默认在输入目录下创建 voice/ 子目录)
192
+ format: 输出格式
193
+ sample_rate: 采样率
194
+
195
+ Returns:
196
+ 转换结果字典 {原文件名: ConversionResult}
197
+ """
198
+ input_dir = Path(input_dir)
199
+ if output_dir is None:
200
+ output_dir = input_dir / "voice"
201
+ else:
202
+ output_dir = Path(output_dir)
203
+
204
+ results = {}
205
+
206
+ # 支持的输入格式
207
+ input_extensions = [".mp3", ".wav", ".m4a", ".flac", ".ogg"]
208
+
209
+ for input_file in input_dir.glob("*"):
210
+ if input_file.suffix.lower() not in input_extensions:
211
+ continue
212
+
213
+ output_file = output_dir / input_file.with_suffix(f".{format}").name
214
+ results[input_file.name] = self.convert_to_voice(
215
+ input_path=input_file,
216
+ output_path=output_file,
217
+ format=format,
218
+ sample_rate=sample_rate
219
+ )
220
+
221
+ return results
222
+
223
+
224
+ # 便捷函数
225
+ def convert_mp3_to_opus(
226
+ input_path: Path,
227
+ output_path: Optional[Path] = None
228
+ ) -> ConversionResult:
229
+ """
230
+ 将 MP3 转换为 Opus 格式(飞书推荐)
231
+
232
+ Args:
233
+ input_path: MP3 文件路径
234
+ output_path: 输出路径(可选)
235
+
236
+ Returns:
237
+ ConversionResult
238
+ """
239
+ converter = AudioConverter()
240
+ return converter.convert_to_voice(
241
+ input_path=input_path,
242
+ output_path=output_path,
243
+ format="opus",
244
+ sample_rate=16000
245
+ )
package/scripts/cli.py CHANGED
@@ -20,7 +20,8 @@ def main():
20
20
  help='Data directory path (default: ~/.openclaw/state/eng-lang-tutor or OPENCLAW_STATE_DIR env)')
21
21
  parser.add_argument('command', nargs='?',
22
22
  choices=['show', 'backup', 'save_daily', 'record_view',
23
- 'stats', 'config', 'errors', 'schedule'],
23
+ 'stats', 'config', 'errors', 'schedule',
24
+ 'generate_audio'],
24
25
  help='Command to execute')
25
26
  parser.add_argument('--content-type', help='Content type for save_daily (keypoint, quiz)')
26
27
  parser.add_argument('--content', help='JSON content for save_daily')
@@ -198,6 +199,25 @@ def main():
198
199
  sm.save_state(state)
199
200
  print(f"Schedule updated: keypoint at {new_keypoint}, quiz at {new_quiz}")
200
201
 
202
+ elif args.command == 'generate_audio':
203
+ """Generate audio for a keypoint."""
204
+ target_date = None
205
+ if args.date:
206
+ try:
207
+ target_date = datetime.strptime(args.date, '%Y-%m-%d').date()
208
+ except ValueError:
209
+ print("Error: Invalid date format. Use YYYY-MM-DD")
210
+ exit(1)
211
+
212
+ result = sm.generate_keypoint_audio(target_date)
213
+
214
+ if result.get('success'):
215
+ print(f"Audio generated: {result.get('audio_path')}")
216
+ print(f"Duration: {result.get('duration_seconds', 0):.1f} seconds")
217
+ else:
218
+ print(f"Failed to generate audio: {result.get('error_message')}")
219
+ exit(1)
220
+
201
221
 
202
222
  if __name__ == "__main__":
203
223
  main()
@@ -0,0 +1,421 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ 飞书语音消息发送模块
4
+
5
+ 将 TTS 生成的音频转换为飞书语音格式并发送。
6
+
7
+ 使用示例:
8
+ from scripts.feishu_voice import FeishuVoiceSender
9
+
10
+ sender = FeishuVoiceSender(app_id="xxx", app_secret="xxx")
11
+
12
+ # 发送单条语音
13
+ await sender.send_voice(
14
+ receive_id="ou_xxx",
15
+ text="Hello, nice to meet you!"
16
+ )
17
+
18
+ # 发送知识点音频
19
+ await sender.send_keypoint_voices(
20
+ receive_id="ou_xxx",
21
+ keypoint=keypoint,
22
+ audio_info=audio_info
23
+ )
24
+ """
25
+
26
+ import os
27
+ import asyncio
28
+ import aiohttp
29
+ from pathlib import Path
30
+ from typing import Optional, Dict, Any, List
31
+ from dataclasses import dataclass
32
+
33
+ from .audio_converter import AudioConverter, ConversionResult
34
+
35
+
36
+ @dataclass
37
+ class VoiceSendResult:
38
+ """语音发送结果"""
39
+ success: bool
40
+ message_id: Optional[str] = None
41
+ error_message: Optional[str] = None
42
+
43
+
44
+ class FeishuVoiceSender:
45
+ """
46
+ 飞书语音消息发送器
47
+
48
+ 工作流程:
49
+ 1. TTS 生成 MP3 音频
50
+ 2. 转换为 Opus 格式(飞书推荐)
51
+ 3. 上传到飞书素材库
52
+ 4. 发送语音消息
53
+ """
54
+
55
+ FEISHU_API_BASE = "https://open.feishu.cn/open-apis"
56
+
57
+ def __init__(
58
+ self,
59
+ app_id: Optional[str] = None,
60
+ app_secret: Optional[str] = None,
61
+ tenant_key: Optional[str] = None,
62
+ audio_dir: Optional[Path] = None
63
+ ):
64
+ """
65
+ 初始化飞书语音发送器
66
+
67
+ Args:
68
+ app_id: 飞书应用 ID(可从环境变量 FEISHU_APP_ID 读取)
69
+ app_secret: 飞书应用密钥(可从环境变量 FEISHU_APP_SECRET 读取)
70
+ tenant_key: 租户密钥(自建应用无需)
71
+ audio_dir: 音频缓存目录
72
+ """
73
+ self.app_id = app_id or os.getenv("FEISHU_APP_ID")
74
+ self.app_secret = app_secret or os.getenv("FEISHU_APP_SECRET")
75
+ self.tenant_key = tenant_key
76
+
77
+ if not self.app_id or not self.app_secret:
78
+ raise ValueError(
79
+ "Missing Feishu credentials. Set FEISHU_APP_ID and FEISHU_APP_SECRET "
80
+ "environment variables or pass them to constructor."
81
+ )
82
+
83
+ self.audio_dir = audio_dir or Path(
84
+ os.getenv("OPENCLAW_STATE_DIR", "~/.openclaw/state/eng-lang-tutor")
85
+ ).expanduser() / "audio"
86
+ self.audio_dir.mkdir(parents=True, exist_ok=True)
87
+
88
+ self.converter = AudioConverter()
89
+ self._access_token: Optional[str] = None
90
+ self._token_expires: float = 0
91
+
92
+ async def _get_access_token(self) -> str:
93
+ """获取飞书访问令牌"""
94
+ import time
95
+
96
+ # 检查缓存
97
+ if self._access_token and time.time() < self._token_expires:
98
+ return self._access_token
99
+
100
+ url = f"{self.FEISHU_API_BASE}/auth/v3/tenant_access_token/internal"
101
+ headers = {"Content-Type": "application/json"}
102
+ data = {
103
+ "app_id": self.app_id,
104
+ "app_secret": self.app_secret
105
+ }
106
+
107
+ async with aiohttp.ClientSession() as session:
108
+ async with session.post(url, headers=headers, json=data) as resp:
109
+ result = await resp.json()
110
+
111
+ if result.get("code") != 0:
112
+ raise RuntimeError(f"Failed to get access token: {result}")
113
+
114
+ self._access_token = result["tenant_access_token"]
115
+ self._token_expires = time.time() + result.get("expire", 7200) - 300
116
+
117
+ return self._access_token
118
+
119
+ async def _upload_file(self, file_path: Path, file_type: str = "opus") -> str:
120
+ """
121
+ 上传文件到飞书素材库
122
+
123
+ Args:
124
+ file_path: 文件路径
125
+ file_type: 文件类型
126
+
127
+ Returns:
128
+ file_key
129
+ """
130
+ token = await self._get_access_token()
131
+
132
+ url = f"{self.FEISHU_API_BASE}/im/v1/files"
133
+ headers = {"Authorization": f"Bearer {token}"}
134
+
135
+ async with aiohttp.ClientSession() as session:
136
+ with open(file_path, "rb") as f:
137
+ form = aiohttp.FormData()
138
+ form.add_field("file_type", file_type)
139
+ form.add_field("file_name", file_path.name)
140
+ form.add_field("file", f, filename=file_path.name)
141
+
142
+ async with session.post(url, headers=headers, data=form) as resp:
143
+ result = await resp.json()
144
+
145
+ if result.get("code") != 0:
146
+ raise RuntimeError(f"Failed to upload file: {result}")
147
+
148
+ return result["data"]["file_key"]
149
+
150
+ async def _send_file_message(
151
+ self,
152
+ receive_id: str,
153
+ file_key: str,
154
+ receive_id_type: str = "open_id"
155
+ ) -> str:
156
+ """
157
+ 发送文件消息
158
+
159
+ Args:
160
+ receive_id: 接收者 ID
161
+ file_key: 文件 key
162
+ receive_id_type: 接收者 ID 类型
163
+
164
+ Returns:
165
+ message_id
166
+ """
167
+ token = await self._get_access_token()
168
+
169
+ url = f"{self.FEISHU_API_BASE}/im/v1/messages"
170
+ headers = {
171
+ "Authorization": f"Bearer {token}",
172
+ "Content-Type": "application/json"
173
+ }
174
+ params = {
175
+ "receive_id_type": receive_id_type
176
+ }
177
+ data = {
178
+ "receive_id": receive_id,
179
+ "msg_type": "file",
180
+ "content": f'{{"file_key": "{file_key}"}}'
181
+ }
182
+
183
+ async with aiohttp.ClientSession() as session:
184
+ async with session.post(url, headers=headers, params=params, json=data) as resp:
185
+ result = await resp.json()
186
+
187
+ if result.get("code") != 0:
188
+ raise RuntimeError(f"Failed to send message: {result}")
189
+
190
+ return result["data"]["message_id"]
191
+
192
+ async def send_voice(
193
+ self,
194
+ receive_id: str,
195
+ audio_path: Path,
196
+ receive_id_type: str = "open_id",
197
+ auto_convert: bool = True,
198
+ delete_after_send: bool = False
199
+ ) -> VoiceSendResult:
200
+ """
201
+ 发送语音消息
202
+
203
+ Args:
204
+ receive_id: 接收者 ID(open_id / user_id / union_id)
205
+ audio_path: 音频文件路径(MP3 或 Opus)
206
+ receive_id_type: 接收者 ID 类型
207
+ auto_convert: 是否自动转换为 Opus 格式
208
+ delete_after_send: 发送后是否删除临时文件
209
+
210
+ Returns:
211
+ VoiceSendResult
212
+ """
213
+ audio_path = Path(audio_path)
214
+ temp_file = None
215
+
216
+ try:
217
+ # 如果是 MP3,转换为 Opus
218
+ if auto_convert and audio_path.suffix.lower() == ".mp3":
219
+ opus_path = audio_path.with_suffix(".opus")
220
+ result = self.converter.convert_to_voice(
221
+ input_path=audio_path,
222
+ output_path=opus_path,
223
+ format="opus",
224
+ sample_rate=16000
225
+ )
226
+ if not result.success:
227
+ return VoiceSendResult(
228
+ success=False,
229
+ error_message=f"Audio conversion failed: {result.error_message}"
230
+ )
231
+ audio_path = opus_path
232
+ temp_file = opus_path if delete_after_send else None
233
+
234
+ # 上传文件
235
+ file_key = await self._upload_file(audio_path)
236
+
237
+ # 发送消息
238
+ message_id = await self._send_file_message(
239
+ receive_id=receive_id,
240
+ file_key=file_key,
241
+ receive_id_type=receive_id_type
242
+ )
243
+
244
+ return VoiceSendResult(
245
+ success=True,
246
+ message_id=message_id
247
+ )
248
+
249
+ except Exception as e:
250
+ return VoiceSendResult(
251
+ success=False,
252
+ error_message=str(e)
253
+ )
254
+ finally:
255
+ # 清理临时文件
256
+ if temp_file and temp_file.exists():
257
+ temp_file.unlink()
258
+
259
+ async def send_voice_from_text(
260
+ self,
261
+ receive_id: str,
262
+ text: str,
263
+ voice: str = "catherine",
264
+ speed: float = 0.9,
265
+ receive_id_type: str = "open_id"
266
+ ) -> VoiceSendResult:
267
+ """
268
+ 从文本直接生成并发送语音
269
+
270
+ Args:
271
+ receive_id: 接收者 ID
272
+ text: 要转换的文本
273
+ voice: 音色
274
+ speed: 语速
275
+ receive_id_type: 接收者 ID 类型
276
+
277
+ Returns:
278
+ VoiceSendResult
279
+ """
280
+ from .tts import TTSManager
281
+
282
+ try:
283
+ # 生成 TTS 音频
284
+ manager = TTSManager.from_env()
285
+ output_path = self.audio_dir / f"temp_{hash(text)}.mp3"
286
+
287
+ result = manager.synthesize(
288
+ text=text,
289
+ output_path=output_path,
290
+ voice=voice,
291
+ speed=speed
292
+ )
293
+
294
+ if not result.success:
295
+ return VoiceSendResult(
296
+ success=False,
297
+ error_message=f"TTS failed: {result.error_message}"
298
+ )
299
+
300
+ # 发送语音
301
+ return await self.send_voice(
302
+ receive_id=receive_id,
303
+ audio_path=output_path,
304
+ receive_id_type=receive_id_type,
305
+ auto_convert=True,
306
+ delete_after_send=True
307
+ )
308
+
309
+ except Exception as e:
310
+ return VoiceSendResult(
311
+ success=False,
312
+ error_message=str(e)
313
+ )
314
+
315
+ async def send_keypoint_voices(
316
+ self,
317
+ receive_id: str,
318
+ keypoint: Dict[str, Any],
319
+ audio_info: Dict[str, Any],
320
+ receive_id_type: str = "open_id",
321
+ include_dialogue: bool = True,
322
+ include_expressions: bool = True
323
+ ) -> List[VoiceSendResult]:
324
+ """
325
+ 发送知识点的所有语音
326
+
327
+ Args:
328
+ receive_id: 接收者 ID
329
+ keypoint: 知识点数据
330
+ audio_info: TTS 生成的音频信息
331
+ receive_id_type: 接收者 ID 类型
332
+ include_dialogue: 是否发送对话音频
333
+ include_expressions: 是否发送表达音频
334
+
335
+ Returns:
336
+ 发送结果列表
337
+ """
338
+ results = []
339
+ date_str = audio_info.get("generated_at", "")[:10] # YYYY-MM-DD
340
+
341
+ # 发送对话音频
342
+ if include_dialogue:
343
+ for item in audio_info.get("dialogue", []):
344
+ if "error" in item:
345
+ results.append(VoiceSendResult(
346
+ success=False,
347
+ error_message=f"Dialogue generation failed: {item['error']}"
348
+ ))
349
+ continue
350
+
351
+ audio_url = item.get("audio_url", "")
352
+ if not audio_url:
353
+ continue
354
+
355
+ # 解析路径:audio/YYYY-MM-DD/filename.opus
356
+ parts = audio_url.split("/")
357
+ audio_path = self.audio_dir / parts[1] / parts[2]
358
+
359
+ if not audio_path.exists():
360
+ # 尝试 MP3 扩展名
361
+ audio_path = audio_path.with_suffix(".mp3")
362
+
363
+ if audio_path.exists():
364
+ speaker = item.get("speaker", "")
365
+ text = item.get("text", "")
366
+
367
+ result = await self.send_voice(
368
+ receive_id=receive_id,
369
+ audio_path=audio_path,
370
+ receive_id_type=receive_id_type,
371
+ auto_convert=True
372
+ )
373
+ results.append(result)
374
+
375
+ # 发送表达音频
376
+ if include_expressions:
377
+ for item in audio_info.get("expressions", []):
378
+ if "error" in item:
379
+ results.append(VoiceSendResult(
380
+ success=False,
381
+ error_message=f"Expression generation failed: {item['error']}"
382
+ ))
383
+ continue
384
+
385
+ audio_url = item.get("audio_url", "")
386
+ if not audio_url:
387
+ continue
388
+
389
+ parts = audio_url.split("/")
390
+ audio_path = self.audio_dir / parts[1] / parts[2]
391
+
392
+ if not audio_path.exists():
393
+ audio_path = audio_path.with_suffix(".mp3")
394
+
395
+ if audio_path.exists():
396
+ result = await self.send_voice(
397
+ receive_id=receive_id,
398
+ audio_path=audio_path,
399
+ receive_id_type=receive_id_type,
400
+ auto_convert=True
401
+ )
402
+ results.append(result)
403
+
404
+ return results
405
+
406
+
407
+ # 同步包装器(用于非异步环境)
408
+ class FeishuVoiceSenderSync:
409
+ """飞书语音发送器的同步包装"""
410
+
411
+ def __init__(self, *args, **kwargs):
412
+ self._async_sender = FeishuVoiceSender(*args, **kwargs)
413
+
414
+ def send_voice(self, *args, **kwargs) -> VoiceSendResult:
415
+ return asyncio.run(self._async_sender.send_voice(*args, **kwargs))
416
+
417
+ def send_voice_from_text(self, *args, **kwargs) -> VoiceSendResult:
418
+ return asyncio.run(self._async_sender.send_voice_from_text(*args, **kwargs))
419
+
420
+ def send_keypoint_voices(self, *args, **kwargs) -> List[VoiceSendResult]:
421
+ return asyncio.run(self._async_sender.send_keypoint_voices(*args, **kwargs))
@@ -287,7 +287,8 @@ class StateManager:
287
287
  return daily_path
288
288
 
289
289
  def save_daily_content(self, content_type: str, content: Dict[str, Any],
290
- target_date: Optional[date] = None) -> Path:
290
+ target_date: Optional[date] = None,
291
+ generate_audio: bool = True) -> Path:
291
292
  """
292
293
  Save content to the daily directory.
293
294
 
@@ -295,6 +296,7 @@ class StateManager:
295
296
  content_type: Type of content ('keypoint', 'quiz', 'user_answers')
296
297
  content: Content dictionary to save
297
298
  target_date: Date for the content (defaults to today)
299
+ generate_audio: Whether to auto-generate audio for keypoints (default True)
298
300
 
299
301
  Returns:
300
302
  Path to the saved file
@@ -310,8 +312,95 @@ class StateManager:
310
312
  with open(file_path, 'w', encoding='utf-8') as f:
311
313
  json.dump(content, f, ensure_ascii=False, indent=2)
312
314
 
315
+ # Auto-generate audio for keypoints
316
+ if content_type == 'keypoint' and generate_audio:
317
+ try:
318
+ audio_result = self.generate_keypoint_audio(target_date)
319
+ if audio_result.get('success'):
320
+ # Update keypoint with audio metadata
321
+ content['audio'] = {
322
+ 'composed': audio_result.get('audio_path'),
323
+ 'duration_seconds': audio_result.get('duration_seconds'),
324
+ 'generated_at': datetime.now().isoformat()
325
+ }
326
+ # Re-save with audio info
327
+ with open(file_path, 'w', encoding='utf-8') as f:
328
+ json.dump(content, f, ensure_ascii=False, indent=2)
329
+ except Exception as e:
330
+ print(f"Warning: Audio generation failed: {e}")
331
+
313
332
  return file_path
314
333
 
334
+ def generate_keypoint_audio(self, target_date: Optional[date] = None) -> Dict[str, Any]:
335
+ """
336
+ Generate composed audio for a keypoint.
337
+
338
+ Args:
339
+ target_date: Date for the keypoint (defaults to today)
340
+
341
+ Returns:
342
+ Dictionary with:
343
+ - success: bool
344
+ - audio_path: str (relative path from data_dir)
345
+ - duration_seconds: float
346
+ - error_message: str (if failed)
347
+ """
348
+ try:
349
+ from .audio_composer import AudioComposer
350
+ from .tts import TTSManager
351
+ except ImportError:
352
+ from audio_composer import AudioComposer
353
+ from tts import TTSManager
354
+
355
+ if target_date is None:
356
+ target_date = date.today()
357
+
358
+ # Load the keypoint
359
+ keypoint = self.load_daily_content('keypoint', target_date)
360
+ if not keypoint:
361
+ return {
362
+ 'success': False,
363
+ 'error_message': f'No keypoint found for {target_date}'
364
+ }
365
+
366
+ # Prepare output path
367
+ date_str = target_date.strftime('%Y-%m-%d')
368
+ self.audio_dir.mkdir(parents=True, exist_ok=True)
369
+ output_path = self.audio_dir / date_str / "keypoint_full.mp3"
370
+ output_path.parent.mkdir(parents=True, exist_ok=True)
371
+
372
+ try:
373
+ # Initialize TTS and composer (handle both package and direct imports)
374
+ try:
375
+ from .audio_composer import AudioComposer
376
+ from .tts import TTSManager
377
+ except ImportError:
378
+ from audio_composer import AudioComposer
379
+ from tts import TTSManager
380
+
381
+ tts = TTSManager.from_env()
382
+ composer = AudioComposer(tts)
383
+
384
+ # Compose audio
385
+ result = composer.compose_keypoint_audio(keypoint, output_path)
386
+
387
+ if result.success:
388
+ return {
389
+ 'success': True,
390
+ 'audio_path': f"audio/{date_str}/keypoint_full.mp3",
391
+ 'duration_seconds': result.duration_seconds
392
+ }
393
+ else:
394
+ return {
395
+ 'success': False,
396
+ 'error_message': result.error_message
397
+ }
398
+ except Exception as e:
399
+ return {
400
+ 'success': False,
401
+ 'error_message': str(e)
402
+ }
403
+
315
404
  def load_daily_content(self, content_type: str,
316
405
  target_date: Optional[date] = None) -> Optional[Dict[str, Any]]:
317
406
  """
@@ -27,9 +27,14 @@ from pathlib import Path
27
27
  from typing import Dict, Any, Optional, Type, ClassVar
28
28
  from datetime import date, datetime
29
29
  import os
30
+ import sys
31
+
32
+ # 添加 scripts 目录到路径以导入 state_manager
33
+ sys.path.insert(0, str(Path(__file__).parent.parent))
30
34
 
31
35
  from .base import TTSProvider, TTSConfig, TTSResult
32
36
  from .providers.xunfei import XunFeiProvider
37
+ from state_manager import get_default_state_dir
33
38
 
34
39
 
35
40
  # Provider 注册表
@@ -57,7 +62,7 @@ class TTSManager:
57
62
  def __init__(
58
63
  self,
59
64
  provider: str = "xunfei",
60
- data_dir: str = "data",
65
+ data_dir: str = None,
61
66
  config: Optional[TTSConfig] = None,
62
67
  **credentials
63
68
  ):
@@ -66,12 +71,12 @@ class TTSManager:
66
71
 
67
72
  Args:
68
73
  provider: Provider 名称(目前仅支持 "xunfei")
69
- data_dir: 数据目录
74
+ data_dir: 数据目录(默认使用 OPENCLAW_STATE_DIR 或 ~/.openclaw/state/eng-lang-tutor/)
70
75
  config: TTS 配置
71
76
  **credentials: Provider 认证信息
72
77
 
73
78
  示例:
74
- # 讯飞(从环境变量读取)
79
+ # 讯飞(使用默认数据目录)
75
80
  manager = TTSManager(provider="xunfei")
76
81
 
77
82
  # 讯飞(直接传入密钥)
@@ -88,7 +93,12 @@ class TTSManager:
88
93
  f"Available: {list(PROVIDERS.keys())}"
89
94
  )
90
95
 
91
- self.data_dir = Path(data_dir)
96
+ # 使用与 StateManager 相同的默认目录逻辑
97
+ if data_dir is None:
98
+ self.data_dir = get_default_state_dir()
99
+ else:
100
+ self.data_dir = Path(data_dir)
101
+
92
102
  self.audio_dir = self.data_dir / "audio"
93
103
  self.audio_dir.mkdir(parents=True, exist_ok=True)
94
104
 
@@ -16,6 +16,8 @@ import base64
16
16
  import hmac
17
17
  import json
18
18
  import os
19
+ import ssl
20
+ import certifi
19
21
  from pathlib import Path
20
22
  from typing import Optional, ClassVar, Dict
21
23
  from urllib.parse import urlencode
@@ -132,8 +134,12 @@ class XunFeiProvider(TTSProvider):
132
134
  data = json.loads(message)
133
135
  if data.get("code") == 0:
134
136
  audio = data.get("data", {}).get("audio", "")
137
+ status = data.get("data", {}).get("status", 0)
135
138
  if audio:
136
139
  audio_data.extend(base64.b64decode(audio))
140
+ # status=2 表示合成完成,关闭连接
141
+ if status == 2:
142
+ ws.close()
137
143
  else:
138
144
  error_msg = f"XunFei API error: code={data.get('code')}, message={data.get('message')}"
139
145
  except json.JSONDecodeError as e:
@@ -170,7 +176,10 @@ class XunFeiProvider(TTSProvider):
170
176
  on_error=on_error,
171
177
  )
172
178
  ws.on_open = on_open
173
- ws.run_forever()
179
+ # 使用 certifi 提供的 SSL 证书
180
+ ws.run_forever(
181
+ sslopt={"cert_reqs": ssl.CERT_REQUIRED, "ca_certs": certifi.where()}
182
+ )
174
183
 
175
184
  if error_msg:
176
185
  return TTSResult(success=False, error_message=error_msg)