smartpi 1.1.4__py3-none-any.whl → 1.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (125) hide show
  1. smartpi/__init__.py +8 -0
  2. smartpi/__init__.pyc +0 -0
  3. smartpi/_gui.py +66 -0
  4. smartpi/_gui.pyc +0 -0
  5. smartpi/ai_asr.py +1037 -0
  6. smartpi/ai_asr.pyc +0 -0
  7. smartpi/ai_llm.py +934 -0
  8. smartpi/ai_llm.pyc +0 -0
  9. smartpi/ai_tts.py +938 -0
  10. smartpi/ai_tts.pyc +0 -0
  11. smartpi/ai_vad.py +83 -0
  12. smartpi/ai_vad.pyc +0 -0
  13. smartpi/audio.py +125 -0
  14. smartpi/audio.pyc +0 -0
  15. smartpi/base_driver.py +618 -0
  16. smartpi/base_driver.pyc +0 -0
  17. smartpi/camera.py +84 -0
  18. smartpi/camera.pyc +0 -0
  19. smartpi/color_sensor.py +18 -0
  20. smartpi/color_sensor.pyc +0 -0
  21. smartpi/cw2015.py +179 -0
  22. smartpi/cw2015.pyc +0 -0
  23. smartpi/flash.py +130 -0
  24. smartpi/flash.pyc +0 -0
  25. smartpi/humidity.py +20 -0
  26. smartpi/humidity.pyc +0 -0
  27. smartpi/led.py +19 -0
  28. smartpi/led.pyc +0 -0
  29. smartpi/light_sensor.py +72 -0
  30. smartpi/light_sensor.pyc +0 -0
  31. smartpi/local_model.py +432 -0
  32. smartpi/local_model.pyc +0 -0
  33. smartpi/mcp_client.py +100 -0
  34. smartpi/mcp_client.pyc +0 -0
  35. smartpi/mcp_fastmcp.py +322 -0
  36. smartpi/mcp_fastmcp.pyc +0 -0
  37. smartpi/mcp_intent_recognizer.py +408 -0
  38. smartpi/mcp_intent_recognizer.pyc +0 -0
  39. smartpi/models/__init__.py +0 -0
  40. smartpi/models/__init__.pyc +0 -0
  41. smartpi/models/snakers4_silero-vad/__init__.py +0 -0
  42. smartpi/models/snakers4_silero-vad/__init__.pyc +0 -0
  43. smartpi/models/snakers4_silero-vad/hubconf.py +56 -0
  44. smartpi/models/snakers4_silero-vad/hubconf.pyc +0 -0
  45. smartpi/motor.py +177 -0
  46. smartpi/motor.pyc +0 -0
  47. smartpi/move.py +218 -0
  48. smartpi/move.pyc +0 -0
  49. smartpi/onnx_hand_workflow.py +201 -0
  50. smartpi/onnx_hand_workflow.pyc +0 -0
  51. smartpi/onnx_image_workflow.py +176 -0
  52. smartpi/onnx_image_workflow.pyc +0 -0
  53. smartpi/onnx_pose_workflow.py +482 -0
  54. smartpi/onnx_pose_workflow.pyc +0 -0
  55. smartpi/onnx_text_workflow.py +173 -0
  56. smartpi/onnx_text_workflow.pyc +0 -0
  57. smartpi/onnx_voice_workflow.py +437 -0
  58. smartpi/onnx_voice_workflow.pyc +0 -0
  59. smartpi/posemodel/__init__.py +0 -0
  60. smartpi/posemodel/__init__.pyc +0 -0
  61. smartpi/posenet_utils.py +222 -0
  62. smartpi/posenet_utils.pyc +0 -0
  63. smartpi/rknn_hand_workflow.py +245 -0
  64. smartpi/rknn_hand_workflow.pyc +0 -0
  65. smartpi/rknn_image_workflow.py +405 -0
  66. smartpi/rknn_image_workflow.pyc +0 -0
  67. smartpi/rknn_pose_workflow.py +592 -0
  68. smartpi/rknn_pose_workflow.pyc +0 -0
  69. smartpi/rknn_text_workflow.py +240 -0
  70. smartpi/rknn_text_workflow.pyc +0 -0
  71. smartpi/rknn_voice_workflow.py +394 -0
  72. smartpi/rknn_voice_workflow.pyc +0 -0
  73. smartpi/servo.py +178 -0
  74. smartpi/servo.pyc +0 -0
  75. smartpi/temperature.py +18 -0
  76. smartpi/temperature.pyc +0 -0
  77. smartpi/tencentcloud-speech-sdk-python/__init__.py +1 -0
  78. smartpi/tencentcloud-speech-sdk-python/__init__.pyc +0 -0
  79. smartpi/tencentcloud-speech-sdk-python/asr/__init__.py +0 -0
  80. smartpi/tencentcloud-speech-sdk-python/asr/__init__.pyc +0 -0
  81. smartpi/tencentcloud-speech-sdk-python/asr/flash_recognizer.py +178 -0
  82. smartpi/tencentcloud-speech-sdk-python/asr/flash_recognizer.pyc +0 -0
  83. smartpi/tencentcloud-speech-sdk-python/asr/speech_recognizer.py +311 -0
  84. smartpi/tencentcloud-speech-sdk-python/asr/speech_recognizer.pyc +0 -0
  85. smartpi/tencentcloud-speech-sdk-python/common/__init__.py +1 -0
  86. smartpi/tencentcloud-speech-sdk-python/common/__init__.pyc +0 -0
  87. smartpi/tencentcloud-speech-sdk-python/common/credential.py +6 -0
  88. smartpi/tencentcloud-speech-sdk-python/common/credential.pyc +0 -0
  89. smartpi/tencentcloud-speech-sdk-python/common/log.py +16 -0
  90. smartpi/tencentcloud-speech-sdk-python/common/log.pyc +0 -0
  91. smartpi/tencentcloud-speech-sdk-python/common/utils.py +7 -0
  92. smartpi/tencentcloud-speech-sdk-python/common/utils.pyc +0 -0
  93. smartpi/tencentcloud-speech-sdk-python/soe/__init__.py +0 -0
  94. smartpi/tencentcloud-speech-sdk-python/soe/__init__.pyc +0 -0
  95. smartpi/tencentcloud-speech-sdk-python/soe/speaking_assessment.py +276 -0
  96. smartpi/tencentcloud-speech-sdk-python/soe/speaking_assessment.pyc +0 -0
  97. smartpi/tencentcloud-speech-sdk-python/tts/__init__.py +0 -0
  98. smartpi/tencentcloud-speech-sdk-python/tts/__init__.pyc +0 -0
  99. smartpi/tencentcloud-speech-sdk-python/tts/flowing_speech_synthesizer.py +294 -0
  100. smartpi/tencentcloud-speech-sdk-python/tts/flowing_speech_synthesizer.pyc +0 -0
  101. smartpi/tencentcloud-speech-sdk-python/tts/speech_synthesizer.py +144 -0
  102. smartpi/tencentcloud-speech-sdk-python/tts/speech_synthesizer.pyc +0 -0
  103. smartpi/tencentcloud-speech-sdk-python/tts/speech_synthesizer_ws.py +234 -0
  104. smartpi/tencentcloud-speech-sdk-python/tts/speech_synthesizer_ws.pyc +0 -0
  105. smartpi/tencentcloud-speech-sdk-python/vc/__init__.py +0 -0
  106. smartpi/tencentcloud-speech-sdk-python/vc/__init__.pyc +0 -0
  107. smartpi/tencentcloud-speech-sdk-python/vc/speech_convertor_ws.py +237 -0
  108. smartpi/tencentcloud-speech-sdk-python/vc/speech_convertor_ws.pyc +0 -0
  109. smartpi/text_gte_model/__init__.py +0 -0
  110. smartpi/text_gte_model/__init__.pyc +0 -0
  111. smartpi/text_gte_model/config/__init__.py +0 -0
  112. smartpi/text_gte_model/config/__init__.pyc +0 -0
  113. smartpi/text_gte_model/gte/__init__.py +0 -0
  114. smartpi/text_gte_model/gte/__init__.pyc +0 -0
  115. smartpi/touch_sensor.py +16 -0
  116. smartpi/touch_sensor.pyc +0 -0
  117. smartpi/trace.py +120 -0
  118. smartpi/trace.pyc +0 -0
  119. smartpi/ultrasonic.py +20 -0
  120. smartpi/ultrasonic.pyc +0 -0
  121. {smartpi-1.1.4.dist-info → smartpi-1.1.5.dist-info}/METADATA +3 -2
  122. smartpi-1.1.5.dist-info/RECORD +137 -0
  123. smartpi-1.1.4.dist-info/RECORD +0 -77
  124. {smartpi-1.1.4.dist-info → smartpi-1.1.5.dist-info}/WHEEL +0 -0
  125. {smartpi-1.1.4.dist-info → smartpi-1.1.5.dist-info}/top_level.txt +0 -0
smartpi/ai_tts.py ADDED
@@ -0,0 +1,938 @@
1
+ # -*- coding: utf-8 -*-
2
+ from datetime import datetime
3
+ import time
4
+ import asyncio
5
+ import threading
6
+ import pyaudio
7
+ import os
8
+ import sys
9
+ import numpy as np
10
+
11
+ # 获取当前文件所在目录的绝对路径
12
+ current_dir = os.path.dirname(os.path.abspath(__file__))
13
+ # 添加SDK路径(使用绝对路径)
14
+ sys.path.append(os.path.join(current_dir, "tencentcloud-speech-sdk-python"))
15
+
16
+ from common import credential
17
+ from tts import flowing_speech_synthesizer
18
+
19
+ # 调试日志开关
20
+ DEBUG_MODE = True # 设置为True开启详细调试日志
21
+
22
+ # 全局AI语音合成对象
23
+ _ai_tts_instance = None
24
+
25
+ # 音频参数设置
26
+ AUDIO_FORMAT = pyaudio.paInt16 # 音频格式,16位PCM
27
+ CHANNELS = 1 # 单声道
28
+ RATE = 16000 # 采样率16kHz
29
+
30
+ # 全局运行状态标志
31
+ is_running = True
32
+
33
+
34
+ class TTSStreamListener(flowing_speech_synthesizer.FlowingSpeechSynthesisListener):
35
+ """TTS合成回调监听器,处理合成结果和进度"""
36
+
37
+ def __init__(self, tts_engine, loop):
38
+ """初始化TTSStreamListener对象
39
+
40
+ 参数:
41
+ tts_engine: TTS引擎实例,用于回调和状态更新
42
+ loop: asyncio事件循环,用于音频队列操作
43
+ """
44
+ super().__init__()
45
+ self.tts_engine = tts_engine
46
+ self.loop = loop
47
+ self.session_id = ""
48
+ self.total_audio_bytes = 0
49
+ self.bit_depth = 16
50
+
51
+ # 文本片段拆分的字节数记录(用于计算每个片段的时长)
52
+ self.current_chunk_bytes = 0 # 当前文本片段的音频字节数
53
+ self.text_chunk_bytes = [] # 存储每个文本片段的音频字节数列表
54
+ self.subtitle_data = [] # 存储字幕数据
55
+
56
+ def on_synthesis_start(self, session_id):
57
+ """合成开始时的回调
58
+
59
+ 参数:
60
+ session_id: 会话ID
61
+ """
62
+ super().on_synthesis_start(session_id)
63
+ self.session_id = session_id
64
+ self.total_audio_bytes = 0
65
+ self.current_chunk_bytes = 0
66
+ self.text_chunk_bytes = []
67
+ self.subtitle_data = []
68
+
69
+ if DEBUG_MODE:
70
+ print(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}|TTS合成开始|session_id: {session_id}")
71
+
72
+ def on_synthesis_end(self):
73
+ """合成结束时的回调"""
74
+ super().on_synthesis_end()
75
+
76
+ # 将最后一个片段的字节数加入列表(若有剩余)
77
+ if self.current_chunk_bytes > 0:
78
+ self.text_chunk_bytes.append(self.current_chunk_bytes)
79
+ self.current_chunk_bytes = 0
80
+
81
+ # 计算每个文本片段的时长并同步给TTS引擎
82
+ chunk_durations = []
83
+ for bytes_cnt in self.text_chunk_bytes:
84
+ duration = bytes_cnt / (self.tts_engine.rate * self.tts_engine.channels * (self.bit_depth / 8))
85
+ chunk_durations.append(round(duration, 2))
86
+ self.tts_engine.text_chunk_durations = chunk_durations
87
+
88
+ # 同步字幕数据到引擎
89
+ self.tts_engine.subtitle_data = self.subtitle_data
90
+
91
+ if DEBUG_MODE:
92
+ print(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}|TTS合成结束|各片段时长: {chunk_durations}秒")
93
+
94
+ self.tts_engine.on_synthesis_complete()
95
+
96
+ def on_audio_result(self, audio_bytes):
97
+ """收到音频结果时的回调
98
+
99
+ 参数:
100
+ audio_bytes: 音频字节数据
101
+ """
102
+ if DEBUG_MODE:
103
+ print(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}|收到音频数据|长度: {len(audio_bytes)}字节")
104
+
105
+ super().on_audio_result(audio_bytes)
106
+
107
+ # 累加当前片段的音频字节数
108
+ if audio_bytes:
109
+ self.total_audio_bytes += len(audio_bytes)
110
+ self.current_chunk_bytes += len(audio_bytes)
111
+
112
+ if audio_bytes and self.tts_engine.audio_queue:
113
+ asyncio.run_coroutine_threadsafe(
114
+ self.tts_engine.audio_queue.put(audio_bytes),
115
+ self.loop
116
+ )
117
+
118
+ def on_synthesis_fail(self, response):
119
+ """合成失败时的回调
120
+
121
+ 参数:
122
+ response: 包含错误信息的响应字典
123
+ """
124
+ super().on_synthesis_fail(response)
125
+ err_code = response["code"]
126
+ err_msg = response["message"]
127
+
128
+ if DEBUG_MODE:
129
+ print(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}|TTS合成失败|错误码: {err_code}|错误信息: {err_msg}")
130
+
131
+ self.tts_engine.on_synthesis_complete()
132
+
133
+ def on_text_result(self, response):
134
+ """收到文本结果时的回调
135
+
136
+ 参数:
137
+ response: 包含文本结果的响应字典
138
+ """
139
+ super().on_text_result(response)
140
+
141
+ # 处理字幕数据
142
+ if 'result' in response and 'subtitles' in response['result']:
143
+ subtitles = response['result']['subtitles']
144
+ if subtitles:
145
+ for subtitle in subtitles:
146
+ # 提取每个字的信息
147
+ text = subtitle.get('Text', '')
148
+ begin_time = subtitle.get('BeginTime', 0)
149
+ end_time = subtitle.get('EndTime', 0)
150
+ begin_index = subtitle.get('BeginIndex', 0)
151
+ end_index = subtitle.get('EndIndex', 0)
152
+
153
+ # 存储字幕数据
154
+ self.subtitle_data.append({
155
+ 'text': text,
156
+ 'begin_time': begin_time / 1000, # 转换为秒
157
+ 'end_time': end_time / 1000, # 转换为秒
158
+ 'begin_index': begin_index,
159
+ 'end_index': end_index
160
+ })
161
+
162
+ if DEBUG_MODE:
163
+ print(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}|收到字幕数据|包含 {len(subtitles)} 个字")
164
+
165
+
166
+ class TencentSpeechSynthesizer:
167
+ """腾讯云流式TTS合成器"""
168
+
169
+ def __init__(self, appid=None, secret_id=None, secret_key=None):
170
+ """初始化TencentSpeechSynthesizer对象
171
+
172
+ 参数:
173
+ appid (str, optional): 腾讯云APPID
174
+ secret_id (str, optional): 腾讯云SECRET_ID
175
+ secret_key (str, optional): 腾讯云SECRET_KEY
176
+ """
177
+ self.state_lock = threading.Lock()
178
+ self.appid = appid
179
+ self.cred = credential.Credential(secret_id, secret_key) if secret_id and secret_key else None
180
+
181
+ self.audio_format = AUDIO_FORMAT
182
+ self.channels = CHANNELS
183
+ self.rate = RATE
184
+
185
+ self.listener = None
186
+ self.synthesizer = None
187
+ self.audio_queue = None
188
+ self.p = None # PyAudio实例
189
+ self.playback_stream = None
190
+ self.loop = None
191
+ self.is_ready = False
192
+ self.loop_thread = None
193
+ self._is_playing = False
194
+ self.synthesis_complete = False
195
+ self.audio_duration = 0.0
196
+
197
+ # 音量控制
198
+ self.volume = 1.0 # 默认音量100%
199
+
200
+ # 文本片段-音频时长映射
201
+ self.text_chunk_durations = [] # 存储每个文本片段的时长
202
+ self.current_chunk_index = 0 # 当前播放的文本片段索引
203
+ self.subtitle_data = [] # 存储字幕数据
204
+
205
+ # 回调函数
206
+ self.on_play_start = None
207
+ self.on_play_end = None
208
+
209
+ # 初始化PyAudio实例
210
+ self._init_pyaudio()
211
+
212
+ def _init_pyaudio(self):
213
+ """初始化PyAudio实例"""
214
+ try:
215
+ self.p = pyaudio.PyAudio()
216
+ if DEBUG_MODE:
217
+ print(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}|PyAudio实例初始化成功")
218
+ except Exception as e:
219
+ if DEBUG_MODE:
220
+ print(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}|PyAudio实例初始化失败|错误: {str(e)}")
221
+ self.p = None
222
+
223
+ @property
224
+ def is_playing(self):
225
+ """是否正在播放的属性
226
+
227
+ 返回:
228
+ bool: True表示正在播放,False表示未在播放
229
+ """
230
+ with self.state_lock:
231
+ return self._is_playing
232
+
233
+ @is_playing.setter
234
+ def is_playing(self, value):
235
+ """设置播放状态
236
+
237
+ 参数:
238
+ value (bool): 播放状态,True表示正在播放,False表示未在播放
239
+ """
240
+ with self.state_lock:
241
+ self._is_playing = value
242
+
243
+ def set_playback_callback(self, on_play_start=None, on_play_end=None):
244
+ """设置播放回调函数
245
+
246
+ 参数:
247
+ on_play_start (callable, optional): 播放开始时的回调函数
248
+ on_play_end (callable, optional): 播放结束时的回调函数
249
+ """
250
+ self.on_play_start = on_play_start
251
+ self.on_play_end = on_play_end
252
+
253
+ def get_audio_duration(self):
254
+ """获取音频总时长
255
+
256
+ 返回:
257
+ float: 音频总时长(秒)
258
+ """
259
+ with self.state_lock:
260
+ return self.audio_duration
261
+
262
+ def get_current_chunk_duration(self):
263
+ """获取当前播放片段的时长
264
+
265
+ 返回:
266
+ float: 当前片段的时长(秒),如果没有片段则返回0.0
267
+ """
268
+ with self.state_lock:
269
+ if self.current_chunk_index < len(self.text_chunk_durations):
270
+ return self.text_chunk_durations[self.current_chunk_index]
271
+ return 0.0
272
+
273
+ def mark_chunk_played(self):
274
+ """标记当前片段已播放完成
275
+
276
+ 返回:
277
+ bool: True表示还有下一段,False表示所有片段已播放完
278
+ """
279
+ with self.state_lock:
280
+ if self.current_chunk_index < len(self.text_chunk_durations) - 1:
281
+ self.current_chunk_index += 1
282
+ return True # 还有下一段
283
+ return False # 所有片段已播放完
284
+
285
+ def on_synthesis_complete(self):
286
+ """合成完成回调"""
287
+ self.synthesis_complete = True
288
+
289
+ # 计算总合成时长
290
+ if hasattr(self, 'text_chunk_durations') and self.text_chunk_durations:
291
+ self.total_duration = sum(self.text_chunk_durations)
292
+ if DEBUG_MODE:
293
+ print(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}|合成已完成|总时长: {self.total_duration:.2f}秒")
294
+ else:
295
+ self.total_duration = 0
296
+ if DEBUG_MODE:
297
+ print(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}|合成已完成|无法计算总时长")
298
+
299
+ def init(self):
300
+ """初始化TTS合成器
301
+
302
+ 返回:
303
+ bool: True表示初始化成功,False表示初始化失败
304
+ """
305
+ # 确保PyAudio实例有效
306
+ if not self.p:
307
+ self._init_pyaudio()
308
+ if not self.p:
309
+ if DEBUG_MODE:
310
+ print(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}|没有可用的PyAudio实例")
311
+ return False
312
+
313
+ self.loop = asyncio.new_event_loop()
314
+ self.loop_thread = threading.current_thread()
315
+ asyncio.set_event_loop(self.loop)
316
+
317
+ self.audio_queue = asyncio.Queue()
318
+ self.listener = TTSStreamListener(self, self.loop)
319
+
320
+ self.synthesizer = flowing_speech_synthesizer.FlowingSpeechSynthesizer(
321
+ self.appid, self.cred, self.listener)
322
+ self.synthesizer.set_voice_type(501000)
323
+ self.synthesizer.set_codec("pcm")
324
+ self.synthesizer.set_sample_rate(self.rate)
325
+ self.synthesizer.set_enable_subtitle(True)
326
+
327
+ self.synthesizer.start()
328
+ # 等待5秒检查是否准备就绪
329
+ self.is_ready = self.synthesizer.wait_ready(5000)
330
+ if not self.is_ready:
331
+ if DEBUG_MODE:
332
+ print(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}|TTS合成器准备超时,但继续初始化")
333
+ # 即使超时也不立即清理,尝试继续使用
334
+ self.is_ready = True # 强制设置为就绪状态
335
+ return self.is_ready
336
+
337
+ def set_volume(self, volume):
338
+ """设置音量
339
+
340
+ 参数:
341
+ volume (float): 音量大小,范围0.0-1.0
342
+
343
+ 返回:
344
+ tuple[bool, float]: (设置是否成功, 设置后的音量值)
345
+ """
346
+ try:
347
+ volume_level = max(0.0, min(1.0, float(volume)))
348
+ self.volume = volume_level
349
+
350
+ if DEBUG_MODE:
351
+ print(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}|TTS音量已设置|值: {volume_level * 100:.1f}%")
352
+
353
+ return True, volume_level
354
+ except Exception as e:
355
+ if DEBUG_MODE:
356
+ print(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}|设置音量失败|错误: {e}")
357
+ return False, None
358
+
359
+ def send_text_chunk(self, text, is_end=False):
360
+ """发送文本片段进行合成
361
+
362
+ 参数:
363
+ text (str): 要合成的文本片段
364
+ is_end (bool, optional): 是否为最后一个文本片段,默认为False
365
+ """
366
+ if not self.synthesizer or not self.is_ready:
367
+ if DEBUG_MODE:
368
+ print(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}|TTS未初始化完成,无法发送文本")
369
+ return
370
+
371
+ # 对于流式TTS,不要清空音频队列,让每个片段都能播放
372
+
373
+ # 重置合成完成标志
374
+ self.synthesis_complete = False
375
+
376
+ try:
377
+ # 检查WebSocket连接是否真正打开,如果已关闭则重新启动合成器
378
+ if not self.synthesizer.ws or self.synthesizer.status in [3, 4, 5]: # FINAL, ERROR, CLOSED
379
+ if DEBUG_MODE:
380
+ print(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}|WebSocket连接已关闭,正在重新启动合成器...")
381
+
382
+ # 重新创建合成器实例
383
+ self.synthesizer = flowing_speech_synthesizer.FlowingSpeechSynthesizer(
384
+ self.appid, self.cred, self.listener)
385
+ self.synthesizer.set_voice_type(501000)
386
+ self.synthesizer.set_codec("pcm")
387
+ self.synthesizer.set_sample_rate(self.rate)
388
+ self.synthesizer.set_enable_subtitle(True)
389
+
390
+ # 启动合成器
391
+ self.synthesizer.start()
392
+
393
+ # 等待连接打开,最多等待5秒
394
+ start_time = time.time()
395
+ while time.time() - start_time < 5:
396
+ if hasattr(self.synthesizer, 'ws') and self.synthesizer.ws and self.synthesizer.status == 2: # OPENED
397
+ if DEBUG_MODE:
398
+ print(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}|WebSocket连接已重新打开")
399
+ break
400
+ time.sleep(0.1)
401
+
402
+ # 如果仍然未打开,返回
403
+ if not hasattr(self.synthesizer, 'ws') or not self.synthesizer.ws or self.synthesizer.status != 2:
404
+ if DEBUG_MODE:
405
+ print(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}|WebSocket连接未打开,无法发送文本")
406
+ return
407
+ elif not self.synthesizer.ws or self.synthesizer.status != 2: # 其他未打开状态
408
+ if DEBUG_MODE:
409
+ print(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}|WebSocket连接未完全打开,等待...")
410
+ # 等待连接打开,最多等待2秒
411
+ start_time = time.time()
412
+ while time.time() - start_time < 2:
413
+ if self.synthesizer.ws and self.synthesizer.status == 2:
414
+ break
415
+ time.sleep(0.1)
416
+
417
+ # 如果仍然未打开,返回
418
+ if not self.synthesizer.ws or self.synthesizer.status != 2:
419
+ if DEBUG_MODE:
420
+ print(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}|WebSocket连接未打开,无法发送文本")
421
+ return
422
+
423
+ # 每次发送新文本片段前,记录上一段的字节数(若有)
424
+ if self.listener and self.listener.current_chunk_bytes > 0:
425
+ self.listener.text_chunk_bytes.append(self.listener.current_chunk_bytes)
426
+ self.listener.current_chunk_bytes = 0
427
+
428
+ self.synthesizer.process(text)
429
+ if is_end:
430
+ self.synthesizer.complete()
431
+ if DEBUG_MODE:
432
+ print(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}|已发送合成完成指令")
433
+ except Exception as e:
434
+ if DEBUG_MODE:
435
+ print(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}|发送文本失败|错误: {e}")
436
+
437
+ def start_playback(self):
438
+ """开始播放合成的音频"""
439
+ def play():
440
+ self.is_playing = True
441
+ if self.on_play_start:
442
+ threading.Thread(target=self.on_play_start, daemon=True).start()
443
+ self.playback_thread = threading.current_thread()
444
+ asyncio.set_event_loop(self.loop)
445
+
446
+ try:
447
+ # 确保PyAudio实例有效
448
+ if not self.p:
449
+ self._init_pyaudio()
450
+ if not self.p:
451
+ if DEBUG_MODE:
452
+ print(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}|没有可用的PyAudio实例,无法播放音频")
453
+ self.is_playing = False
454
+ if self.on_play_end:
455
+ threading.Thread(target=self.on_play_end, daemon=True).start()
456
+ return
457
+
458
+ # 尝试使用配置的采样率,如果失败则尝试其他常见采样率
459
+ supported_rates = [self.rate, 44100, 48000, 8000, 22050]
460
+ selected_rate = None
461
+
462
+ for rate in supported_rates:
463
+ try:
464
+ # 尝试打开一个临时流来测试采样率
465
+ test_stream = self.p.open(
466
+ format=self.audio_format,
467
+ channels=self.channels,
468
+ rate=rate,
469
+ output=True,
470
+ frames_per_buffer=1024
471
+ )
472
+ test_stream.close()
473
+ selected_rate = rate
474
+ break
475
+ except Exception as e:
476
+ if DEBUG_MODE:
477
+ print(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}|采样率 {rate}Hz 不受支持|错误: {e}")
478
+ continue
479
+
480
+ if selected_rate is None:
481
+ if DEBUG_MODE:
482
+ print(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}|没有找到支持的采样率")
483
+ self.is_playing = False
484
+ if self.on_play_end:
485
+ threading.Thread(target=self.on_play_end, daemon=True).start()
486
+ return
487
+
488
+ if DEBUG_MODE:
489
+ print(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}|使用采样率: {selected_rate}Hz")
490
+
491
+ # 使用找到的支持的采样率打开播放流
492
+ self.playback_stream = self.p.open(
493
+ format=self.audio_format,
494
+ channels=self.channels,
495
+ rate=selected_rate,
496
+ output=True
497
+ )
498
+
499
+ while is_running and (self.is_playing or not self.audio_queue.empty() or not self.synthesis_complete):
500
+ try:
501
+ audio_data = self.loop.run_until_complete(
502
+ asyncio.wait_for(self.audio_queue.get(), timeout=0.5)
503
+ )
504
+
505
+ # 应用音量增益
506
+ if self.volume != 1.0:
507
+ # 仅当音量不是100%时进行处理
508
+ if self.audio_format == pyaudio.paInt16:
509
+ # 转换为numpy数组进行音量调整
510
+ audio_array = np.frombuffer(audio_data, dtype=np.int16)
511
+ # 应用音量增益
512
+ adjusted_array = (audio_array * self.volume).astype(np.int16)
513
+ # 转换回字节流
514
+ audio_data = adjusted_array.tobytes()
515
+
516
+ self.playback_stream.write(audio_data)
517
+ self.audio_queue.task_done()
518
+ except asyncio.TimeoutError:
519
+ if self.synthesis_complete and self.audio_queue.empty():
520
+ break
521
+ continue
522
+ except asyncio.CancelledError:
523
+ break
524
+ except Exception as e:
525
+ if DEBUG_MODE:
526
+ print(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}|播放错误|错误: {e}")
527
+ break
528
+
529
+ if DEBUG_MODE:
530
+ print(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}|播放循环已退出")
531
+
532
+ finally:
533
+ if self.playback_stream:
534
+ try:
535
+ self.playback_stream.stop_stream()
536
+ self.playback_stream.close()
537
+ except Exception as e:
538
+ if DEBUG_MODE:
539
+ print(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}|关闭音频流时发生错误|错误: {e}")
540
+ finally:
541
+ self.playback_stream = None
542
+
543
+ self.is_playing = False
544
+ # 播放结束时,标记当前片段完成(确保字幕同步收尾)
545
+ self.mark_chunk_played()
546
+ if self.on_play_end:
547
+ threading.Thread(target=self.on_play_end, daemon=True).start()
548
+ if DEBUG_MODE:
549
+ print(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}|播放已停止")
550
+
551
+ threading.Thread(target=play, daemon=True).start()
552
+
553
+ def stop_playback(self):
554
+ """停止TTS播放并清理资源"""
555
+ if not self.is_playing:
556
+ if DEBUG_MODE:
557
+ print(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}|播放已停止或未开始")
558
+ return
559
+
560
+ try:
561
+ if DEBUG_MODE:
562
+ print(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}|尝试停止TTS播放")
563
+
564
+ self.is_playing = False
565
+
566
+ # 清空音频队列
567
+ while not self.audio_queue.empty():
568
+ try:
569
+ self.audio_queue.get_nowait()
570
+ self.audio_queue.task_done()
571
+ except Exception:
572
+ break
573
+
574
+ # 如果合成器正在运行,发送完成指令
575
+ if self.synthesizer and not self.synthesis_complete:
576
+ self.synthesizer.complete()
577
+
578
+ # 停止时清空片段状态
579
+ with self.state_lock:
580
+ self.current_chunk_index = 0
581
+ self.text_chunk_durations = []
582
+
583
+ # 立即标记合成为完成状态,中断合成流程
584
+ self.synthesis_complete = True
585
+
586
+ if DEBUG_MODE:
587
+ print(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}|TTS播放已停止")
588
+
589
+ except Exception as e:
590
+ if DEBUG_MODE:
591
+ print(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}|停止播放失败|错误: {e}")
592
+
593
+ def cleanup(self):
594
+ """清理TTS资源"""
595
+ global _ai_tts_instance
596
+
597
+ if DEBUG_MODE:
598
+ print(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}|正在清理TTS资源")
599
+
600
+ try:
601
+ # 停止播放
602
+ self.stop_playback()
603
+
604
+ # 清理合成器
605
+ if self.synthesizer:
606
+ self.synthesizer.wait()
607
+ self.synthesizer = None
608
+
609
+ # 关闭事件循环
610
+ if self.loop and not self.loop.is_closed():
611
+ async def safe_shutdown():
612
+ await self.audio_queue.join()
613
+ tasks = [t for t in asyncio.all_tasks() if t is not asyncio.current_task()]
614
+ for task in tasks:
615
+ task.cancel()
616
+ await asyncio.gather(*tasks, return_exceptions=True)
617
+ self.loop.stop()
618
+
619
+ self.loop.call_soon_threadsafe(
620
+ lambda: asyncio.create_task(safe_shutdown())
621
+ )
622
+
623
+ if threading.current_thread() != self.loop_thread:
624
+ while not self.loop.is_closed():
625
+ time.sleep(0.1)
626
+
627
+ # 释放PyAudio实例
628
+ if self.p:
629
+ try:
630
+ self.p.terminate()
631
+ except Exception:
632
+ pass
633
+ finally:
634
+ self.p = None
635
+
636
+ # 重置状态
637
+ self.is_ready = False
638
+ self.listener = None
639
+
640
+ if DEBUG_MODE:
641
+ print(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}|TTS资源清理完成")
642
+
643
+ # 释放全局实例
644
+ if _ai_tts_instance is self:
645
+ _ai_tts_instance = None
646
+
647
+ except Exception as e:
648
+ if DEBUG_MODE:
649
+ print(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}|清理TTS资源失败|错误: {e}")
650
+
651
+ def blocking_synthesize_and_play(self, text, volume=None, rate=1.0, voice_type=1, platform="qcloud"):
652
+ """阻塞式TTS合成与播放
653
+
654
+ 参数:
655
+ text (str): 要合成的文本
656
+ volume (float, optional): 音量,范围0.0-1.0,None表示使用当前设置
657
+ rate (float, optional): 语速,范围0.1-3.0,默认为1.0
658
+ voice_type (int, optional): 音色类型,默认为1
659
+ platform (str, optional): 合成平台,默认为"qcloud"
660
+
661
+ 返回:
662
+ bool: 播放是否成功
663
+ """
664
+ if DEBUG_MODE:
665
+ print(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}|开始阻塞式TTS合成与播放|文本: {text[:50]}...")
666
+
667
+ # 存储原始设置
668
+ original_volume = self.volume
669
+ volume_changed = False
670
+
671
+ try:
672
+ # 设置音量(如果提供了参数)
673
+ if volume is not None:
674
+ self.set_volume(volume)
675
+ volume_changed = True
676
+
677
+ # 等待播放完成的事件
678
+ play_completed_event = threading.Event()
679
+
680
+ # 定义播放结束回调
681
+ def on_playback_end():
682
+ play_completed_event.set()
683
+
684
+ # 设置回调
685
+ self.on_play_end = on_playback_end
686
+
687
+ # 初始化播放流
688
+ if not self.is_playing:
689
+ self.start_playback()
690
+
691
+ # 发送文本进行合成
692
+ self.send_text_chunk(text, is_end=True)
693
+
694
+ # 等待播放完成
695
+ play_completed_event.wait(timeout=60) # 最多等待60秒
696
+
697
+ if play_completed_event.is_set():
698
+ if DEBUG_MODE:
699
+ print(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}|阻塞式播放已完成")
700
+ else:
701
+ if DEBUG_MODE:
702
+ print(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}|阻塞式播放超时")
703
+
704
+ return True
705
+
706
+ except Exception as e:
707
+ if DEBUG_MODE:
708
+ print(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}|阻塞式TTS合成与播放失败|错误: {e}")
709
+ self.stop_playback()
710
+ return False
711
+
712
+ finally:
713
+ # 恢复原始音量(仅当明确设置了音量时)
714
+ if volume_changed:
715
+ self.set_volume(original_volume)
716
+ # 清空回调
717
+ self.on_play_end = None
718
+
719
+
720
+ def init(secret_id, secret_key, app_id, volume=1) -> bool:
721
+ """初始化TTS引擎
722
+
723
+ 参数:
724
+ secret_id (str): 腾讯云API密钥ID
725
+ secret_key (str): 腾讯云API密钥Key
726
+ app_id (str): 腾讯云应用ID
727
+ volume (float, optional): 初始音量,范围0.0-1.0,默认为1
728
+
729
+ 返回:
730
+ bool: 初始化成功返回True,失败返回False
731
+ """
732
+ global _ai_tts_instance
733
+
734
+ if DEBUG_MODE:
735
+ print(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}|初始化TTS引擎|app_id: {app_id}")
736
+
737
+ try:
738
+ if _ai_tts_instance is not None:
739
+ if DEBUG_MODE:
740
+ print(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}|TTS实例已存在,先清理资源")
741
+ _ai_tts_instance.cleanup()
742
+
743
+ _ai_tts_instance = TencentSpeechSynthesizer(app_id, secret_id, secret_key)
744
+
745
+ # 设置初始音量
746
+ if volume != 1:
747
+ _ai_tts_instance.set_volume(volume)
748
+
749
+ # 调用内部init方法完成真正的初始化
750
+ if not _ai_tts_instance.init():
751
+ if DEBUG_MODE:
752
+ print(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}|TTS引擎内部初始化失败")
753
+ return False
754
+
755
+ if DEBUG_MODE:
756
+ print(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}|TTS引擎初始化成功")
757
+ return True
758
+
759
+ except Exception as e:
760
+ if DEBUG_MODE:
761
+ print(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}|TTS引擎初始化失败|错误: {e}")
762
+ import traceback
763
+ traceback.print_exc()
764
+ return False
765
+
766
+
767
+ def speak(text: str, is_end: bool = True) -> None:
768
+ """开始TTS合成与播放
769
+
770
+ 参数:
771
+ text (str): 要合成的文本
772
+ is_end (bool, optional): 是否为最后一个文本片段,默认为True
773
+ """
774
+ global _ai_tts_instance
775
+
776
+ if _ai_tts_instance is None:
777
+ print("TTS未初始化,请先调用 init() 函数")
778
+ return
779
+
780
+ try:
781
+ if DEBUG_MODE:
782
+ print(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}|开始TTS合成与播放|文本: {text[:50]}...")
783
+
784
+ # 如果还没有开始播放,先启动播放线程
785
+ if not _ai_tts_instance.is_playing:
786
+ _ai_tts_instance.start_playback()
787
+
788
+ # 发送文本进行合成
789
+ _ai_tts_instance.send_text_chunk(text, is_end=is_end)
790
+
791
+ except Exception as e:
792
+ if DEBUG_MODE:
793
+ print(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}|TTS播放失败|错误: {e}")
794
+
795
+
796
+ def speak_blocking(text: str, volume: float = None) -> bool:
797
+ """阻塞式TTS合成与播放
798
+
799
+ 参数:
800
+ text (str): 要合成的文本
801
+ volume (float, optional): 音量,范围0.0-1.0,默认使用当前设置
802
+
803
+ 返回:
804
+ bool: 播放是否成功
805
+ """
806
+ global _ai_tts_instance
807
+
808
+ if _ai_tts_instance is None:
809
+ print("TTS未初始化,请先调用 init() 函数")
810
+ return False
811
+
812
+ try:
813
+ # 如果指定了音量,先保存原始音量,播放后恢复
814
+ original_volume = None
815
+ if volume is not None:
816
+ original_volume = _ai_tts_instance.volume
817
+ _ai_tts_instance.set_volume(volume)
818
+
819
+ # 使用阻塞式播放
820
+ result = _ai_tts_instance.blocking_synthesize_and_play(text)
821
+
822
+ # 恢复原始音量
823
+ if original_volume is not None:
824
+ _ai_tts_instance.set_volume(original_volume)
825
+
826
+ return result
827
+
828
+ except Exception as e:
829
+ if DEBUG_MODE:
830
+ print(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}|阻塞式TTS播放失败|错误: {e}")
831
+ return False
832
+
833
+
834
+ def stop() -> None:
835
+ """停止TTS播放
836
+
837
+ 返回:
838
+ None
839
+ """
840
+ global _ai_tts_instance
841
+
842
+ if _ai_tts_instance is None:
843
+ print("TTS未初始化")
844
+ return
845
+
846
+ if DEBUG_MODE:
847
+ print(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}|停止TTS播放")
848
+
849
+ _ai_tts_instance.stop_playback()
850
+
851
+
852
+ def set_volume(volume: float) -> bool:
853
+ """设置TTS音量
854
+
855
+ 参数:
856
+ volume (float): 音量大小,范围0.0-1.0
857
+
858
+ 返回:
859
+ bool: 设置是否成功
860
+ """
861
+ global _ai_tts_instance
862
+
863
+ if _ai_tts_instance is None:
864
+ print("TTS未初始化,请先调用 init() 函数")
865
+ return False
866
+
867
+ success, _ = _ai_tts_instance.set_volume(volume)
868
+ return success
869
+
870
+
871
+ def is_speaking() -> bool:
872
+ """检查TTS是否正在播放
873
+
874
+ 返回:
875
+ bool: 如果正在播放返回True,否则返回False
876
+ """
877
+ global _ai_tts_instance
878
+
879
+ if _ai_tts_instance is None:
880
+ return False
881
+
882
+ return _ai_tts_instance.is_playing
883
+
884
+ def set_playback_callback(on_play_start=None, on_play_end=None) -> None:
885
+ """设置播放回调函数
886
+
887
+ 参数:
888
+ on_play_start (callable, optional): 播放开始时的回调函数
889
+ on_play_end (callable, optional): 播放结束时的回调函数
890
+ """
891
+ global _ai_tts_instance
892
+
893
+ if _ai_tts_instance is not None:
894
+ _ai_tts_instance.set_playback_callback(on_play_start, on_play_end)
895
+
896
+
897
+ def release() -> None:
898
+ """释放TTS资源
899
+
900
+ 返回:
901
+ None
902
+ """
903
+ global _ai_tts_instance
904
+
905
+ if _ai_tts_instance is not None:
906
+ if DEBUG_MODE:
907
+ print(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}|释放TTS资源")
908
+ _ai_tts_instance.cleanup()
909
+ _ai_tts_instance = None
910
+ else:
911
+ if DEBUG_MODE:
912
+ print(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}|TTS实例不存在")
913
+
914
+
915
+ # 如果直接运行该文件,提供简单的测试功能
916
+ if __name__ == "__main__":
917
+ import sys
918
+
919
+ if len(sys.argv) < 4:
920
+ print("用法: python ai_tts.py <app_id> <secret_id> <secret_key> [文本]")
921
+ sys.exit(1)
922
+
923
+ app_id = sys.argv[1]
924
+ secret_id = sys.argv[2]
925
+ secret_key = sys.argv[3]
926
+ text = "欢迎使用腾讯云TTS服务" if len(sys.argv) < 5 else sys.argv[4]
927
+
928
+ # 初始化TTS
929
+ if not init(secret_id, secret_key, app_id):
930
+ print("TTS初始化失败")
931
+ sys.exit(1)
932
+
933
+ # 开始播放
934
+ print(f"开始播放文本: {text}")
935
+ speak_blocking(text)
936
+
937
+ print("播放完成")
938
+ release()