smartpi 1.1.3__py3-none-any.whl → 1.1.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- smartpi/__init__.pyc +0 -0
- smartpi/_gui.pyc +0 -0
- smartpi/ai_asr.pyc +0 -0
- smartpi/ai_llm.pyc +0 -0
- smartpi/ai_tts.pyc +0 -0
- smartpi/ai_vad.pyc +0 -0
- smartpi/audio.pyc +0 -0
- smartpi/base_driver.pyc +0 -0
- smartpi/camera.pyc +0 -0
- smartpi/color_sensor.pyc +0 -0
- smartpi/cw2015.pyc +0 -0
- smartpi/flash.pyc +0 -0
- smartpi/humidity.pyc +0 -0
- smartpi/led.pyc +0 -0
- smartpi/light_sensor.pyc +0 -0
- smartpi/local_model.pyc +0 -0
- smartpi/mcp_client.pyc +0 -0
- smartpi/mcp_fastmcp.pyc +0 -0
- smartpi/mcp_intent_recognizer.pyc +0 -0
- smartpi/models/__init__.pyc +0 -0
- smartpi/models/snakers4_silero-vad/__init__.pyc +0 -0
- smartpi/models/snakers4_silero-vad/hubconf.pyc +0 -0
- smartpi/motor.pyc +0 -0
- smartpi/move.pyc +0 -0
- smartpi/onnx_hand_workflow.pyc +0 -0
- smartpi/onnx_image_workflow.pyc +0 -0
- smartpi/onnx_pose_workflow.pyc +0 -0
- smartpi/onnx_text_workflow.pyc +0 -0
- smartpi/onnx_voice_workflow.pyc +0 -0
- smartpi/posemodel/__init__.pyc +0 -0
- smartpi/posenet_utils.pyc +0 -0
- smartpi/rknn_hand_workflow.pyc +0 -0
- smartpi/rknn_image_workflow.pyc +0 -0
- smartpi/rknn_pose_workflow.pyc +0 -0
- smartpi/rknn_text_workflow.pyc +0 -0
- smartpi/rknn_voice_workflow.pyc +0 -0
- smartpi/servo.pyc +0 -0
- smartpi/temperature.pyc +0 -0
- smartpi/tencentcloud-speech-sdk-python/__init__.pyc +0 -0
- smartpi/tencentcloud-speech-sdk-python/asr/__init__.pyc +0 -0
- smartpi/tencentcloud-speech-sdk-python/asr/flash_recognizer.pyc +0 -0
- smartpi/tencentcloud-speech-sdk-python/asr/speech_recognizer.pyc +0 -0
- smartpi/tencentcloud-speech-sdk-python/common/__init__.pyc +0 -0
- smartpi/tencentcloud-speech-sdk-python/common/credential.pyc +0 -0
- smartpi/tencentcloud-speech-sdk-python/common/log.pyc +0 -0
- smartpi/tencentcloud-speech-sdk-python/common/utils.pyc +0 -0
- smartpi/tencentcloud-speech-sdk-python/soe/__init__.pyc +0 -0
- smartpi/tencentcloud-speech-sdk-python/soe/speaking_assessment.pyc +0 -0
- smartpi/tencentcloud-speech-sdk-python/tts/__init__.pyc +0 -0
- smartpi/tencentcloud-speech-sdk-python/tts/flowing_speech_synthesizer.pyc +0 -0
- smartpi/tencentcloud-speech-sdk-python/tts/speech_synthesizer.pyc +0 -0
- smartpi/tencentcloud-speech-sdk-python/tts/speech_synthesizer_ws.pyc +0 -0
- smartpi/tencentcloud-speech-sdk-python/vc/__init__.pyc +0 -0
- smartpi/tencentcloud-speech-sdk-python/vc/speech_convertor_ws.pyc +0 -0
- smartpi/text_gte_model/__init__.pyc +0 -0
- smartpi/text_gte_model/config/__init__.pyc +0 -0
- smartpi/text_gte_model/gte/__init__.pyc +0 -0
- smartpi/touch_sensor.pyc +0 -0
- smartpi/trace.pyc +0 -0
- smartpi/ultrasonic.pyc +0 -0
- {smartpi-1.1.3.dist-info → smartpi-1.1.4.dist-info}/METADATA +1 -1
- smartpi-1.1.4.dist-info/RECORD +77 -0
- smartpi/__init__.py +0 -8
- smartpi/_gui.py +0 -66
- smartpi/ai_asr.py +0 -1037
- smartpi/ai_llm.py +0 -934
- smartpi/ai_tts.py +0 -938
- smartpi/ai_vad.py +0 -83
- smartpi/audio.py +0 -125
- smartpi/base_driver.py +0 -618
- smartpi/camera.py +0 -84
- smartpi/color_sensor.py +0 -18
- smartpi/cw2015.py +0 -179
- smartpi/flash.py +0 -130
- smartpi/humidity.py +0 -20
- smartpi/led.py +0 -19
- smartpi/light_sensor.py +0 -72
- smartpi/local_model.py +0 -432
- smartpi/mcp_client.py +0 -100
- smartpi/mcp_fastmcp.py +0 -322
- smartpi/mcp_intent_recognizer.py +0 -408
- smartpi/models/__init__.py +0 -0
- smartpi/models/snakers4_silero-vad/__init__.py +0 -0
- smartpi/models/snakers4_silero-vad/hubconf.py +0 -56
- smartpi/motor.py +0 -177
- smartpi/move.py +0 -218
- smartpi/onnx_hand_workflow.py +0 -201
- smartpi/onnx_image_workflow.py +0 -176
- smartpi/onnx_pose_workflow.py +0 -482
- smartpi/onnx_text_workflow.py +0 -173
- smartpi/onnx_voice_workflow.py +0 -437
- smartpi/posemodel/__init__.py +0 -0
- smartpi/posenet_utils.py +0 -222
- smartpi/rknn_hand_workflow.py +0 -245
- smartpi/rknn_image_workflow.py +0 -405
- smartpi/rknn_pose_workflow.py +0 -592
- smartpi/rknn_text_workflow.py +0 -240
- smartpi/rknn_voice_workflow.py +0 -394
- smartpi/servo.py +0 -178
- smartpi/temperature.py +0 -18
- smartpi/tencentcloud-speech-sdk-python/__init__.py +0 -1
- smartpi/tencentcloud-speech-sdk-python/asr/__init__.py +0 -0
- smartpi/tencentcloud-speech-sdk-python/asr/flash_recognizer.py +0 -178
- smartpi/tencentcloud-speech-sdk-python/asr/speech_recognizer.py +0 -311
- smartpi/tencentcloud-speech-sdk-python/common/__init__.py +0 -1
- smartpi/tencentcloud-speech-sdk-python/common/credential.py +0 -6
- smartpi/tencentcloud-speech-sdk-python/common/log.py +0 -16
- smartpi/tencentcloud-speech-sdk-python/common/utils.py +0 -7
- smartpi/tencentcloud-speech-sdk-python/soe/__init__.py +0 -0
- smartpi/tencentcloud-speech-sdk-python/soe/speaking_assessment.py +0 -276
- smartpi/tencentcloud-speech-sdk-python/tts/__init__.py +0 -0
- smartpi/tencentcloud-speech-sdk-python/tts/flowing_speech_synthesizer.py +0 -294
- smartpi/tencentcloud-speech-sdk-python/tts/speech_synthesizer.py +0 -144
- smartpi/tencentcloud-speech-sdk-python/tts/speech_synthesizer_ws.py +0 -234
- smartpi/tencentcloud-speech-sdk-python/vc/__init__.py +0 -0
- smartpi/tencentcloud-speech-sdk-python/vc/speech_convertor_ws.py +0 -237
- smartpi/text_gte_model/__init__.py +0 -0
- smartpi/text_gte_model/config/__init__.py +0 -0
- smartpi/text_gte_model/gte/__init__.py +0 -0
- smartpi/touch_sensor.py +0 -16
- smartpi/trace.py +0 -120
- smartpi/ultrasonic.py +0 -20
- smartpi-1.1.3.dist-info/RECORD +0 -77
- {smartpi-1.1.3.dist-info → smartpi-1.1.4.dist-info}/WHEEL +0 -0
- {smartpi-1.1.3.dist-info → smartpi-1.1.4.dist-info}/top_level.txt +0 -0
smartpi/ai_asr.py
DELETED
|
@@ -1,1037 +0,0 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
import time
|
|
3
|
-
|
|
4
|
-
import threading
|
|
5
|
-
from datetime import datetime
|
|
6
|
-
import json
|
|
7
|
-
import os
|
|
8
|
-
import pyaudio
|
|
9
|
-
import wave
|
|
10
|
-
from threading import Event
|
|
11
|
-
import sys
|
|
12
|
-
import os
|
|
13
|
-
|
|
14
|
-
# 获取当前文件所在目录的绝对路径
|
|
15
|
-
current_dir = os.path.dirname(os.path.abspath(__file__))
|
|
16
|
-
# 添加SDK路径(使用绝对路径)
|
|
17
|
-
sys.path.append(os.path.join(current_dir, "tencentcloud-speech-sdk-python"))
|
|
18
|
-
from common import credential
|
|
19
|
-
from asr import speech_recognizer
|
|
20
|
-
|
|
21
|
-
# 延迟导入VAD模块,减少启动时间
|
|
22
|
-
ai_vad = None
|
|
23
|
-
|
|
24
|
-
# 调试日志开关
|
|
25
|
-
DEBUG_MODE = False # 设置为True开启详细调试日志
|
|
26
|
-
|
|
27
|
-
# 全局AI语音识别对象
|
|
28
|
-
_ai_asr_instance = None
|
|
29
|
-
|
|
30
|
-
class Connection:
|
|
31
|
-
"""用于存储VAD状态的连接类"""
|
|
32
|
-
|
|
33
|
-
def __init__(self):
|
|
34
|
-
"""初始化Connection对象"""
|
|
35
|
-
self.client_voice_stop = False # 语音是否停止的标志
|
|
36
|
-
|
|
37
|
-
def reset_vad_states(self) -> None:
|
|
38
|
-
"""重置VAD状态
|
|
39
|
-
|
|
40
|
-
返回值:
|
|
41
|
-
None
|
|
42
|
-
"""
|
|
43
|
-
self.client_voice_stop = False
|
|
44
|
-
|
|
45
|
-
# 音频参数设置
|
|
46
|
-
FORMAT = pyaudio.paInt16 # 音频格式,16位PCM
|
|
47
|
-
CHANNELS = 1 # 单声道
|
|
48
|
-
RATE = 16000 # 采样率16kHz
|
|
49
|
-
CHUNK = 1024 # 每次读取的音频块大小
|
|
50
|
-
SLICE_SIZE = 6400 # SDK要求的分片大小
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
class MicrophoneSpeechListener(speech_recognizer.SpeechRecognitionListener):
|
|
54
|
-
"""语音识别回调监听器,处理并存储识别结果"""
|
|
55
|
-
|
|
56
|
-
def __init__(self, sentence_callback=None):
|
|
57
|
-
"""初始化MicrophoneSpeechListener对象
|
|
58
|
-
|
|
59
|
-
参数:
|
|
60
|
-
sentence_callback (callable, optional): 句子识别完成后的回调函数,接收一个字符串参数
|
|
61
|
-
"""
|
|
62
|
-
self.sentence_callback = sentence_callback # 句子识别完成后的回调函数
|
|
63
|
-
self.final_result = "" # 最终识别结果
|
|
64
|
-
self.interim_result = "" # 中间识别结果
|
|
65
|
-
self.is_recognizing = False # 是否正在识别的标志
|
|
66
|
-
self.lock = threading.Lock() # 添加线程锁,保护共享变量的读写
|
|
67
|
-
self.timestamps = {} # 用于跟踪各个步骤的时间戳,用于调试
|
|
68
|
-
|
|
69
|
-
def on_recognition_start(self, response: dict) -> None:
|
|
70
|
-
"""识别开始时的回调
|
|
71
|
-
|
|
72
|
-
参数:
|
|
73
|
-
response (dict): 包含识别开始信息的响应字典
|
|
74
|
-
|
|
75
|
-
返回值:
|
|
76
|
-
None
|
|
77
|
-
"""
|
|
78
|
-
with self.lock:
|
|
79
|
-
self.is_recognizing = True
|
|
80
|
-
|
|
81
|
-
# 记录开始时间和调试信息
|
|
82
|
-
start_time = time.time()
|
|
83
|
-
self.timestamps['recognition_start'] = start_time
|
|
84
|
-
|
|
85
|
-
if DEBUG_MODE:
|
|
86
|
-
voice_id = response.get('voice_id', '未知')
|
|
87
|
-
print(f"\n{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}|识别开始|voice_id: {voice_id}|耗时: 0.00ms")
|
|
88
|
-
|
|
89
|
-
def on_sentence_begin(self, response: dict) -> None:
|
|
90
|
-
"""句子开始时的回调
|
|
91
|
-
|
|
92
|
-
参数:
|
|
93
|
-
response (dict): 包含句子开始信息的响应字典
|
|
94
|
-
|
|
95
|
-
返回值:
|
|
96
|
-
None
|
|
97
|
-
"""
|
|
98
|
-
# 记录时间和计算耗时
|
|
99
|
-
current_time = time.time()
|
|
100
|
-
self.timestamps['sentence_begin'] = current_time
|
|
101
|
-
|
|
102
|
-
# 计算从识别开始到句子开始的耗时
|
|
103
|
-
if 'recognition_start' in self.timestamps:
|
|
104
|
-
elapsed = (current_time - self.timestamps['recognition_start']) * 1000
|
|
105
|
-
else:
|
|
106
|
-
elapsed = 0.0
|
|
107
|
-
|
|
108
|
-
if DEBUG_MODE:
|
|
109
|
-
rsp_str = json.dumps(response, ensure_ascii=False)
|
|
110
|
-
print(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}|句子开始|耗时: {elapsed:.2f}ms|{rsp_str}")
|
|
111
|
-
|
|
112
|
-
def on_recognition_result_change(self, response: dict) -> None:
|
|
113
|
-
"""识别结果变化时的回调
|
|
114
|
-
|
|
115
|
-
参数:
|
|
116
|
-
response (dict): 包含最新识别结果的响应字典
|
|
117
|
-
|
|
118
|
-
返回值:
|
|
119
|
-
None
|
|
120
|
-
"""
|
|
121
|
-
# 记录时间和计算耗时
|
|
122
|
-
current_time = time.time()
|
|
123
|
-
self.timestamps['result_change'] = current_time
|
|
124
|
-
|
|
125
|
-
# 计算从句子开始到结果变化的耗时
|
|
126
|
-
if 'sentence_begin' in self.timestamps:
|
|
127
|
-
elapsed = (current_time - self.timestamps['sentence_begin']) * 1000
|
|
128
|
-
else:
|
|
129
|
-
elapsed = 0.0
|
|
130
|
-
|
|
131
|
-
if DEBUG_MODE:
|
|
132
|
-
rsp_str = json.dumps(response, ensure_ascii=False)
|
|
133
|
-
print(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}|结果更新|耗时: {elapsed:.2f}ms|{rsp_str}")
|
|
134
|
-
|
|
135
|
-
# 提取中间结果
|
|
136
|
-
if "result" in response and "voice_text_str" in response["result"]:
|
|
137
|
-
with self.lock:
|
|
138
|
-
self.interim_result = response["result"]["voice_text_str"]
|
|
139
|
-
# 打印实时结果,不换行(此为功能输出,不受DEBUG_MODE控制)
|
|
140
|
-
print(f"\r识别中: {self.interim_result}", end="")
|
|
141
|
-
elif not DEBUG_MODE: # 只有在非调试模式下才打印非结果更新的响应
|
|
142
|
-
print(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}|结果更新|{rsp_str}")
|
|
143
|
-
|
|
144
|
-
def on_sentence_end(self, response: dict) -> None:
|
|
145
|
-
"""句子结束时的回调
|
|
146
|
-
|
|
147
|
-
参数:
|
|
148
|
-
response (dict): 包含句子结束信息的响应字典
|
|
149
|
-
|
|
150
|
-
返回值:
|
|
151
|
-
None
|
|
152
|
-
"""
|
|
153
|
-
# 记录时间和计算耗时
|
|
154
|
-
current_time = time.time()
|
|
155
|
-
self.timestamps['sentence_end'] = current_time
|
|
156
|
-
|
|
157
|
-
# 计算从句子开始到句子结束的耗时
|
|
158
|
-
if 'sentence_begin' in self.timestamps:
|
|
159
|
-
elapsed = (current_time - self.timestamps['sentence_begin']) * 1000
|
|
160
|
-
else:
|
|
161
|
-
elapsed = 0.0
|
|
162
|
-
|
|
163
|
-
if DEBUG_MODE:
|
|
164
|
-
rsp_str = json.dumps(response, ensure_ascii=False)
|
|
165
|
-
print(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}|句子结束|耗时: {elapsed:.2f}ms|{rsp_str}")
|
|
166
|
-
|
|
167
|
-
# 提取最终结果
|
|
168
|
-
if "result" in response and "voice_text_str" in response["result"]:
|
|
169
|
-
sentence = response["result"]["voice_text_str"]
|
|
170
|
-
with self.lock:
|
|
171
|
-
self.final_result += sentence
|
|
172
|
-
|
|
173
|
-
# 打印句子识别完成信息(此为功能输出,不受DEBUG_MODE控制)
|
|
174
|
-
print(f"\n句子识别完成: {sentence}")
|
|
175
|
-
|
|
176
|
-
# 调用回调函数处理这个句子
|
|
177
|
-
if self.sentence_callback:
|
|
178
|
-
# 在新线程中调用回调,避免阻塞识别过程
|
|
179
|
-
threading.Thread(target=self.sentence_callback, args=(sentence,), daemon=True).start()
|
|
180
|
-
elif not DEBUG_MODE: # 只有在非调试模式下才打印非结果的响应
|
|
181
|
-
print(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}|句子结束|{rsp_str}")
|
|
182
|
-
|
|
183
|
-
def on_recognition_complete(self, response: dict) -> None:
|
|
184
|
-
"""识别完成时的回调
|
|
185
|
-
|
|
186
|
-
参数:
|
|
187
|
-
response (dict): 包含识别完成信息的响应字典
|
|
188
|
-
|
|
189
|
-
返回值:
|
|
190
|
-
None
|
|
191
|
-
"""
|
|
192
|
-
with self.lock:
|
|
193
|
-
self.is_recognizing = False
|
|
194
|
-
|
|
195
|
-
# 记录时间和计算耗时
|
|
196
|
-
current_time = time.time()
|
|
197
|
-
|
|
198
|
-
# 计算总耗时
|
|
199
|
-
if 'recognition_start' in self.timestamps:
|
|
200
|
-
total_elapsed = (current_time - self.timestamps['recognition_start']) * 1000
|
|
201
|
-
else:
|
|
202
|
-
total_elapsed = 0.0
|
|
203
|
-
|
|
204
|
-
if DEBUG_MODE:
|
|
205
|
-
voice_id = response.get('voice_id', '未知')
|
|
206
|
-
print(f"\n{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}|识别完成|voice_id: {voice_id}|总耗时: {total_elapsed:.2f}ms")
|
|
207
|
-
print(f"最终识别结果: {self.final_result}")
|
|
208
|
-
|
|
209
|
-
def on_fail(self, response: dict) -> None:
|
|
210
|
-
"""识别失败时的回调
|
|
211
|
-
|
|
212
|
-
参数:
|
|
213
|
-
response (dict): 包含识别失败信息的响应字典
|
|
214
|
-
|
|
215
|
-
返回值:
|
|
216
|
-
None
|
|
217
|
-
"""
|
|
218
|
-
with self.lock:
|
|
219
|
-
self.is_recognizing = False
|
|
220
|
-
|
|
221
|
-
# 记录时间和计算耗时
|
|
222
|
-
current_time = time.time()
|
|
223
|
-
|
|
224
|
-
# 计算总耗时
|
|
225
|
-
if 'recognition_start' in self.timestamps:
|
|
226
|
-
total_elapsed = (current_time - self.timestamps['recognition_start']) * 1000
|
|
227
|
-
else:
|
|
228
|
-
total_elapsed = 0.0
|
|
229
|
-
|
|
230
|
-
rsp_str = json.dumps(response, ensure_ascii=False)
|
|
231
|
-
print(f"\n{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}|识别失败|总耗时: {total_elapsed:.2f}ms|{rsp_str}")
|
|
232
|
-
|
|
233
|
-
# 处理错误码4008(客户端超过15秒未发送音频数据),重启语音识别
|
|
234
|
-
if 'code' in response and response['code'] == 4008:
|
|
235
|
-
print(f"\n{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}|检测到错误码4008,正在重启语音识别...")
|
|
236
|
-
# 获取全局ASR实例并重启
|
|
237
|
-
global _ai_asr_instance
|
|
238
|
-
if _ai_asr_instance:
|
|
239
|
-
try:
|
|
240
|
-
_ai_asr_instance.stop()
|
|
241
|
-
time.sleep(1) # 增加延迟时间,确保资源完全释放
|
|
242
|
-
_ai_asr_instance.start()
|
|
243
|
-
print(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}|语音识别已成功重启")
|
|
244
|
-
except Exception as e:
|
|
245
|
-
print(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}|语音识别重启失败: {e}")
|
|
246
|
-
import traceback
|
|
247
|
-
traceback.print_exc()
|
|
248
|
-
# 如果重启失败,尝试重新创建ASR实例
|
|
249
|
-
try:
|
|
250
|
-
print(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}|尝试重新创建ASR实例...")
|
|
251
|
-
# 重新初始化ASR实例
|
|
252
|
-
from smartpi.ai_asr import init
|
|
253
|
-
# 使用原有的初始化参数重新初始化
|
|
254
|
-
init(
|
|
255
|
-
appid=_ai_asr_instance.appid,
|
|
256
|
-
secret_id=_ai_asr_instance.secret_id,
|
|
257
|
-
secret_key=_ai_asr_instance.secret_key,
|
|
258
|
-
sentence_callback=_ai_asr_instance.listener.sentence_callback,
|
|
259
|
-
engine_model_type=_ai_asr_instance.engine_model_type
|
|
260
|
-
)
|
|
261
|
-
print(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}|ASR实例已重新创建")
|
|
262
|
-
except Exception as e2:
|
|
263
|
-
print(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}|重新创建ASR实例失败: {e2}")
|
|
264
|
-
import traceback
|
|
265
|
-
traceback.print_exc()
|
|
266
|
-
|
|
267
|
-
def get_final_result(self) -> str:
|
|
268
|
-
"""获取当前的最终识别结果
|
|
269
|
-
|
|
270
|
-
返回值:
|
|
271
|
-
str: 最终识别结果字符串
|
|
272
|
-
"""
|
|
273
|
-
with self.lock:
|
|
274
|
-
return self.final_result
|
|
275
|
-
|
|
276
|
-
def get_interim_result(self) -> str:
|
|
277
|
-
"""获取当前的中间识别结果
|
|
278
|
-
|
|
279
|
-
返回值:
|
|
280
|
-
str: 中间识别结果字符串
|
|
281
|
-
"""
|
|
282
|
-
with self.lock:
|
|
283
|
-
return self.interim_result
|
|
284
|
-
|
|
285
|
-
def clear_results(self) -> None:
|
|
286
|
-
"""清除当前的识别结果
|
|
287
|
-
|
|
288
|
-
返回值:
|
|
289
|
-
None
|
|
290
|
-
"""
|
|
291
|
-
start_time = time.time()
|
|
292
|
-
with self.lock:
|
|
293
|
-
self.final_result = ""
|
|
294
|
-
self.interim_result = ""
|
|
295
|
-
|
|
296
|
-
elapsed = (time.time() - start_time) * 1000
|
|
297
|
-
if DEBUG_MODE:
|
|
298
|
-
print(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}|清除结果|耗时: {elapsed:.2f}ms")
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
class TencentSpeechRecognizer:
|
|
302
|
-
"""腾讯云语音识别封装类,提供开始、停止、暂停和恢复的接口"""
|
|
303
|
-
|
|
304
|
-
def __init__(self, appid: str, secret_id: str, secret_key: str,
|
|
305
|
-
engine_model_type: str = "16k_zh",
|
|
306
|
-
sentence_callback=None, vad=None,
|
|
307
|
-
interruption_callback=None):
|
|
308
|
-
"""初始化TencentSpeechRecognizer对象
|
|
309
|
-
|
|
310
|
-
参数:
|
|
311
|
-
appid (str): 腾讯云应用ID
|
|
312
|
-
secret_id (str): 腾讯云Secret ID
|
|
313
|
-
secret_key (str): 腾讯云Secret Key
|
|
314
|
-
engine_model_type (str, optional): 引擎模型类型,默认"16k_zh"
|
|
315
|
-
sentence_callback (callable, optional): 句子识别完成后的回调函数
|
|
316
|
-
vad (object, optional): 语音活动检测(VAD)对象
|
|
317
|
-
interruption_callback (callable, optional): 打断回调函数
|
|
318
|
-
"""
|
|
319
|
-
self.appid = appid # 腾讯云应用ID
|
|
320
|
-
self.secret_id = secret_id # 腾讯云Secret ID
|
|
321
|
-
self.secret_key = secret_key # 腾讯云Secret Key
|
|
322
|
-
self.engine_model_type = engine_model_type # 引擎模型类型
|
|
323
|
-
|
|
324
|
-
# 初始化监听器,传入句子回调函数
|
|
325
|
-
self.listener = MicrophoneSpeechListener(sentence_callback)
|
|
326
|
-
self.credential = credential.Credential(self.secret_id, self.secret_key ) # 腾讯云凭证
|
|
327
|
-
self.recognizer = None # 语音识别器实例
|
|
328
|
-
self.capture_thread = None # 麦克风采集线程
|
|
329
|
-
self.stop_event = Event() # 停止事件
|
|
330
|
-
self.pause_event = Event() # 用于控制暂停状态的事件
|
|
331
|
-
self.is_running = False # 是否正在运行的标志
|
|
332
|
-
self.is_paused = False # 是否处于暂停状态的标志
|
|
333
|
-
|
|
334
|
-
# VAD相关
|
|
335
|
-
self.vad = vad # 语音活动检测(VAD)对象
|
|
336
|
-
self.connection = Connection() if vad else None # VAD状态连接对象
|
|
337
|
-
|
|
338
|
-
# 打断回调函数,当检测到用户说话且需要打断当前活动时调用
|
|
339
|
-
self.interruption_callback = interruption_callback
|
|
340
|
-
self.lock = threading.Lock() # 添加线程锁,保护共享状态变量的读写
|
|
341
|
-
|
|
342
|
-
def _microphone_audio_capture(self) -> None:
|
|
343
|
-
"""麦克风音频采集线程函数
|
|
344
|
-
|
|
345
|
-
返回值:
|
|
346
|
-
None
|
|
347
|
-
"""
|
|
348
|
-
p = pyaudio.PyAudio()
|
|
349
|
-
stream = None # 初始化流为None
|
|
350
|
-
audio_buffer = b"" # 音频缓冲区,在try块之前初始化,防止finally块引用未定义变量
|
|
351
|
-
try:
|
|
352
|
-
# 打开麦克风流(指定输入设备,避免默认设备错误)
|
|
353
|
-
stream = p.open(
|
|
354
|
-
format=FORMAT,
|
|
355
|
-
channels=CHANNELS,
|
|
356
|
-
rate=RATE,
|
|
357
|
-
input=True,
|
|
358
|
-
input_device_index=None, # 自动选择默认设备,可改为具体设备索引
|
|
359
|
-
frames_per_buffer=CHUNK
|
|
360
|
-
)
|
|
361
|
-
|
|
362
|
-
print("开始录音... 按Ctrl+C停止")
|
|
363
|
-
# 用于跟踪上次发送数据的时间,防止连接超时
|
|
364
|
-
last_send_time = time.time()
|
|
365
|
-
|
|
366
|
-
# 重置VAD状态
|
|
367
|
-
if self.connection:
|
|
368
|
-
self.connection.reset_vad_states()
|
|
369
|
-
|
|
370
|
-
while not self.stop_event.is_set():
|
|
371
|
-
# 检查是否处于暂停状态,如果是则等待
|
|
372
|
-
if self.pause_event.is_set():
|
|
373
|
-
self.is_paused = True
|
|
374
|
-
# 暂停时清空缓冲区,避免恢复后处理旧数据
|
|
375
|
-
audio_buffer = b""
|
|
376
|
-
# 重置VAD状态
|
|
377
|
-
if self.connection:
|
|
378
|
-
self.connection.reset_vad_states()
|
|
379
|
-
# 短暂休眠减少CPU占用
|
|
380
|
-
time.sleep(0.1)
|
|
381
|
-
continue
|
|
382
|
-
|
|
383
|
-
if self.is_paused:
|
|
384
|
-
# 从暂停状态恢复
|
|
385
|
-
self.is_paused = False
|
|
386
|
-
print("\n语音识别已恢复")
|
|
387
|
-
# 恢复时重新开始计时
|
|
388
|
-
last_send_time = time.time()
|
|
389
|
-
# 重置VAD状态
|
|
390
|
-
if self.connection:
|
|
391
|
-
self.connection.reset_vad_states()
|
|
392
|
-
|
|
393
|
-
try:
|
|
394
|
-
# 读取音频数据(添加超时和溢出处理)
|
|
395
|
-
data = stream.read(CHUNK, exception_on_overflow=False) # 禁止溢出时抛异常
|
|
396
|
-
|
|
397
|
-
# VAD检测
|
|
398
|
-
if self.vad and self.connection:
|
|
399
|
-
# 使用VAD检测语音
|
|
400
|
-
have_voice = self.vad.is_vad(self.connection, data)
|
|
401
|
-
|
|
402
|
-
# 检查语音是否结束
|
|
403
|
-
if self.connection.client_voice_stop:
|
|
404
|
-
print("\nVAD检测到语音结束")
|
|
405
|
-
# 发送剩余的音频数据
|
|
406
|
-
if len(audio_buffer) > 0 and self.recognizer:
|
|
407
|
-
try:
|
|
408
|
-
self.recognizer.write(audio_buffer)
|
|
409
|
-
audio_buffer = b""
|
|
410
|
-
# 发送尾包通知服务端识别结束
|
|
411
|
-
self.recognizer.write(b"")
|
|
412
|
-
last_send_time = time.time()
|
|
413
|
-
print("\n已发送尾包,通知服务端识别结束")
|
|
414
|
-
except Exception as e:
|
|
415
|
-
print(f"发送音频数据失败: {e}")
|
|
416
|
-
|
|
417
|
-
# 确保识别完成并获取结果
|
|
418
|
-
if self.listener and hasattr(self.listener, 'final_result') and self.listener.final_result:
|
|
419
|
-
final_result = self.listener.final_result
|
|
420
|
-
print(f"[VAD触发] 处理最终识别结果: {final_result}")
|
|
421
|
-
# 直接调用句子回调函数处理结果
|
|
422
|
-
if hasattr(self.listener, 'sentence_callback') and self.listener.sentence_callback:
|
|
423
|
-
threading.Thread(target=self.listener.sentence_callback, args=(final_result,), daemon=True).start()
|
|
424
|
-
# 重置监听器的结果
|
|
425
|
-
if self.listener:
|
|
426
|
-
self.listener.final_result = ""
|
|
427
|
-
self.listener.interim_result = ""
|
|
428
|
-
# 重新创建识别器准备下一次识别
|
|
429
|
-
self._recreate_recognizer()
|
|
430
|
-
# 重置VAD状态
|
|
431
|
-
self.connection.reset_vad_states()
|
|
432
|
-
# 短暂休眠避免频繁处理
|
|
433
|
-
time.sleep(0.01)
|
|
434
|
-
continue
|
|
435
|
-
|
|
436
|
-
# 只有在检测到语音时才添加到缓冲区
|
|
437
|
-
if have_voice:
|
|
438
|
-
audio_buffer += data
|
|
439
|
-
# 检查是否需要打断当前活动
|
|
440
|
-
if self.interruption_callback and hasattr(self.connection, 'client_have_voice') and self.connection.client_have_voice:
|
|
441
|
-
print("[VAD检测] 用户开始说话,检查是否需要打断")
|
|
442
|
-
# 立即调用打断回调函数
|
|
443
|
-
threading.Thread(target=self.interruption_callback, daemon=True).start()
|
|
444
|
-
else:
|
|
445
|
-
# 没有VAD时,直接添加到缓冲区
|
|
446
|
-
audio_buffer += data
|
|
447
|
-
|
|
448
|
-
current_time = time.time()
|
|
449
|
-
# 检查是否超过13秒未发送数据(提前2秒防止超时)
|
|
450
|
-
if current_time - last_send_time > 13:
|
|
451
|
-
if self.recognizer:
|
|
452
|
-
try:
|
|
453
|
-
if audio_buffer:
|
|
454
|
-
# 有数据则发送数据
|
|
455
|
-
self.recognizer.write(audio_buffer[:SLICE_SIZE])
|
|
456
|
-
audio_buffer = audio_buffer[SLICE_SIZE:]
|
|
457
|
-
else:
|
|
458
|
-
# 无数据则发送小量空数据保持连接活跃
|
|
459
|
-
self.recognizer.write(b"\x00" * 100)
|
|
460
|
-
print("\n长时间无数据发送,发送空数据包保持连接活跃")
|
|
461
|
-
last_send_time = current_time
|
|
462
|
-
except Exception as e:
|
|
463
|
-
print(f"发送音频数据失败: {e}")
|
|
464
|
-
# 尝试重新创建recognizer
|
|
465
|
-
if not self.stop_event.is_set() and not self.pause_event.is_set():
|
|
466
|
-
self._recreate_recognizer()
|
|
467
|
-
else:
|
|
468
|
-
# 当缓冲区达到分片大小时发送,或距离上次发送超过40ms时发送(实时率要求)
|
|
469
|
-
if len(audio_buffer) >= SLICE_SIZE or (current_time - last_send_time > 0.04 and audio_buffer):
|
|
470
|
-
if self.recognizer:
|
|
471
|
-
try:
|
|
472
|
-
self.recognizer.write(audio_buffer[:SLICE_SIZE])
|
|
473
|
-
last_send_time = current_time
|
|
474
|
-
audio_buffer = audio_buffer[SLICE_SIZE:]
|
|
475
|
-
except Exception as e:
|
|
476
|
-
print(f"发送音频数据失败: {e}")
|
|
477
|
-
# 尝试重新创建recognizer
|
|
478
|
-
if not self.stop_event.is_set() and not self.pause_event.is_set():
|
|
479
|
-
self._recreate_recognizer()
|
|
480
|
-
|
|
481
|
-
time.sleep(0.001)
|
|
482
|
-
except OSError as e:
|
|
483
|
-
# 处理输入溢出,清空缓冲区避免累积
|
|
484
|
-
if e.errno == -9981:
|
|
485
|
-
print("警告:音频输入溢出,已重置缓冲区")
|
|
486
|
-
audio_buffer = b""
|
|
487
|
-
else:
|
|
488
|
-
raise e
|
|
489
|
-
|
|
490
|
-
except OSError as e:
|
|
491
|
-
# 处理录音设备不可用错误
|
|
492
|
-
print(f"\n录音发生错误: {e}")
|
|
493
|
-
if "Device unavailable" in str(e):
|
|
494
|
-
print("检测到录音设备不可用,尝试延迟后重新初始化...")
|
|
495
|
-
time.sleep(2) # 延迟2秒后尝试重新初始化
|
|
496
|
-
if not self.stop_event.is_set():
|
|
497
|
-
print("尝试重新初始化录音设备...")
|
|
498
|
-
# 重新启动识别
|
|
499
|
-
self.stop()
|
|
500
|
-
time.sleep(1)
|
|
501
|
-
self.start()
|
|
502
|
-
except Exception as e:
|
|
503
|
-
print(f"\n录音发生错误: {e}")
|
|
504
|
-
import traceback
|
|
505
|
-
traceback.print_exc()
|
|
506
|
-
finally:
|
|
507
|
-
# 安全关闭流(检查流是否已打开)
|
|
508
|
-
if stream is not None:
|
|
509
|
-
try:
|
|
510
|
-
if stream.is_active():
|
|
511
|
-
stream.stop_stream()
|
|
512
|
-
stream.close()
|
|
513
|
-
except Exception:
|
|
514
|
-
pass # 忽略关闭流时的错误
|
|
515
|
-
|
|
516
|
-
try:
|
|
517
|
-
p.terminate()
|
|
518
|
-
except Exception:
|
|
519
|
-
pass # 忽略终止pyaudio时的错误
|
|
520
|
-
|
|
521
|
-
# 发送剩余数据
|
|
522
|
-
if len(audio_buffer) > 0 and self.recognizer and not self.pause_event.is_set() and not self.stop_event.is_set():
|
|
523
|
-
try:
|
|
524
|
-
self.recognizer.write(audio_buffer)
|
|
525
|
-
except Exception:
|
|
526
|
-
pass # 忽略发送数据时的错误
|
|
527
|
-
|
|
528
|
-
if self.recognizer:
|
|
529
|
-
try:
|
|
530
|
-
self.recognizer.stop()
|
|
531
|
-
except Exception:
|
|
532
|
-
pass # 忽略停止识别器时的错误
|
|
533
|
-
|
|
534
|
-
# 只有在不准备重新启动的情况下才设置is_running为False
|
|
535
|
-
if not hasattr(self, 'stop_event') or self.stop_event.is_set():
|
|
536
|
-
try:
|
|
537
|
-
self.is_running = False
|
|
538
|
-
except Exception:
|
|
539
|
-
pass
|
|
540
|
-
|
|
541
|
-
print("\n录音已停止")
|
|
542
|
-
|
|
543
|
-
def _recreate_recognizer(self) -> None:
|
|
544
|
-
"""重新创建识别器,用于处理连接断开的情况
|
|
545
|
-
|
|
546
|
-
返回值:
|
|
547
|
-
None
|
|
548
|
-
"""
|
|
549
|
-
start_time = time.time()
|
|
550
|
-
try:
|
|
551
|
-
# 先停止旧的识别器
|
|
552
|
-
if self.recognizer:
|
|
553
|
-
try:
|
|
554
|
-
self.recognizer.stop()
|
|
555
|
-
except Exception as e:
|
|
556
|
-
if DEBUG_MODE:
|
|
557
|
-
print(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}|停止旧识别器时发生错误: {e}")
|
|
558
|
-
finally:
|
|
559
|
-
self.recognizer = None # 确保旧识别器被释放
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
# 创建新的识别器
|
|
563
|
-
self.recognizer = speech_recognizer.SpeechRecognizer(
|
|
564
|
-
self.appid, self.credential, self.engine_model_type, self.listener)
|
|
565
|
-
|
|
566
|
-
# 配置识别参数
|
|
567
|
-
self.recognizer.set_filter_modal(1) # 过滤语气词
|
|
568
|
-
self.recognizer.set_filter_punc(1) # 过滤标点符号
|
|
569
|
-
self.recognizer.set_filter_dirty(1) # 过滤脏词
|
|
570
|
-
self.recognizer.set_need_vad(1) # 启用语音活动检测
|
|
571
|
-
self.recognizer.set_voice_format(1) # 音频格式为PCM
|
|
572
|
-
self.recognizer.set_word_info(1) # 返回词级别信息
|
|
573
|
-
self.recognizer.set_convert_num_mode(1) # 数字转换为中文
|
|
574
|
-
|
|
575
|
-
# 启动新的识别器
|
|
576
|
-
self.recognizer.start()
|
|
577
|
-
elapsed = (time.time() - start_time) * 1000
|
|
578
|
-
if DEBUG_MODE:
|
|
579
|
-
print(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}|重新创建识别器|耗时: {elapsed:.2f}ms")
|
|
580
|
-
else:
|
|
581
|
-
print("已重新创建语音识别连接")
|
|
582
|
-
except Exception as e:
|
|
583
|
-
elapsed = (time.time() - start_time) * 1000
|
|
584
|
-
self.recognizer = None # 确保识别器为空
|
|
585
|
-
import traceback
|
|
586
|
-
traceback.print_exc()
|
|
587
|
-
if DEBUG_MODE:
|
|
588
|
-
print(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}|重新创建识别器失败|耗时: {elapsed:.2f}ms|错误: {e}")
|
|
589
|
-
else:
|
|
590
|
-
print(f"重新创建识别器失败: {e}")
|
|
591
|
-
|
|
592
|
-
def start(self) -> None:
|
|
593
|
-
"""开始语音识别
|
|
594
|
-
|
|
595
|
-
返回值:
|
|
596
|
-
None
|
|
597
|
-
"""
|
|
598
|
-
with self.lock:
|
|
599
|
-
if self.is_running:
|
|
600
|
-
# 如果正在运行但处于暂停状态,则恢复
|
|
601
|
-
if self.is_paused:
|
|
602
|
-
self.resume()
|
|
603
|
-
if DEBUG_MODE:
|
|
604
|
-
print(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}|启动识别|状态: 识别已在运行中")
|
|
605
|
-
else:
|
|
606
|
-
print("识别已在运行中")
|
|
607
|
-
return
|
|
608
|
-
|
|
609
|
-
start_time = time.time()
|
|
610
|
-
# 重置状态
|
|
611
|
-
self.stop_event.clear()
|
|
612
|
-
self.pause_event.clear()
|
|
613
|
-
self.listener.clear_results()
|
|
614
|
-
with self.lock:
|
|
615
|
-
self.is_paused = False
|
|
616
|
-
|
|
617
|
-
# 创建识别器
|
|
618
|
-
try:
|
|
619
|
-
self._recreate_recognizer()
|
|
620
|
-
|
|
621
|
-
# 启动麦克风采集线程
|
|
622
|
-
self.capture_thread = threading.Thread(target=self._microphone_audio_capture)
|
|
623
|
-
self.capture_thread.start()
|
|
624
|
-
with self.lock:
|
|
625
|
-
self.is_running = True
|
|
626
|
-
|
|
627
|
-
elapsed = (time.time() - start_time) * 1000
|
|
628
|
-
if DEBUG_MODE:
|
|
629
|
-
print(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}|启动识别|耗时: {elapsed:.2f}ms|状态: 语音识别已启动")
|
|
630
|
-
else:
|
|
631
|
-
print("语音识别已启动")
|
|
632
|
-
|
|
633
|
-
except Exception as e:
|
|
634
|
-
elapsed = (time.time() - start_time) * 1000
|
|
635
|
-
if DEBUG_MODE:
|
|
636
|
-
print(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}|启动识别失败|耗时: {elapsed:.2f}ms|错误: {e}")
|
|
637
|
-
else:
|
|
638
|
-
print(f"启动识别发生错误: {e}")
|
|
639
|
-
with self.lock:
|
|
640
|
-
self.is_running = False
|
|
641
|
-
|
|
642
|
-
def stop(self) -> None:
|
|
643
|
-
"""停止语音识别
|
|
644
|
-
|
|
645
|
-
返回值:
|
|
646
|
-
None
|
|
647
|
-
"""
|
|
648
|
-
with self.lock:
|
|
649
|
-
if not self.is_running:
|
|
650
|
-
if DEBUG_MODE:
|
|
651
|
-
print(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}|停止识别|状态: 识别未在运行")
|
|
652
|
-
else:
|
|
653
|
-
print("识别未在运行中")
|
|
654
|
-
return
|
|
655
|
-
|
|
656
|
-
# 立即设置状态,防止其他线程干扰
|
|
657
|
-
self.is_running = False
|
|
658
|
-
self.is_paused = False
|
|
659
|
-
|
|
660
|
-
start_time = time.time()
|
|
661
|
-
# 设置停止标志
|
|
662
|
-
self.stop_event.set()
|
|
663
|
-
self.pause_event.clear()
|
|
664
|
-
|
|
665
|
-
# 等待采集线程结束
|
|
666
|
-
if self.capture_thread and self.capture_thread.is_alive():
|
|
667
|
-
try:
|
|
668
|
-
self.capture_thread.join(2) # 等待最长2秒
|
|
669
|
-
except Exception:
|
|
670
|
-
pass
|
|
671
|
-
|
|
672
|
-
# 停止识别器
|
|
673
|
-
if self.recognizer:
|
|
674
|
-
try:
|
|
675
|
-
self.recognizer.stop()
|
|
676
|
-
except Exception:
|
|
677
|
-
pass # 忽略停止时的错误
|
|
678
|
-
self.recognizer = None # 置空识别器,确保下次启动时创建新的
|
|
679
|
-
|
|
680
|
-
# 重置事件
|
|
681
|
-
self.stop_event.clear()
|
|
682
|
-
|
|
683
|
-
elapsed = (time.time() - start_time) * 1000
|
|
684
|
-
if DEBUG_MODE:
|
|
685
|
-
print(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}|停止识别|耗时: {elapsed:.2f}ms|状态: 语音识别已停止")
|
|
686
|
-
else:
|
|
687
|
-
print("语音识别已停止")
|
|
688
|
-
|
|
689
|
-
def pause(self) -> None:
|
|
690
|
-
"""暂停语音识别(关闭连接,而不是保持连接但不发送数据)
|
|
691
|
-
|
|
692
|
-
返回值:
|
|
693
|
-
None
|
|
694
|
-
"""
|
|
695
|
-
with self.lock:
|
|
696
|
-
if not self.is_running:
|
|
697
|
-
if DEBUG_MODE:
|
|
698
|
-
print(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}|暂停识别|状态: 识别未在运行中")
|
|
699
|
-
else:
|
|
700
|
-
print("识别未在运行中,无法暂停")
|
|
701
|
-
return
|
|
702
|
-
|
|
703
|
-
if self.is_paused:
|
|
704
|
-
if DEBUG_MODE:
|
|
705
|
-
print(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}|暂停识别|状态: 识别已处于暂停状态")
|
|
706
|
-
else:
|
|
707
|
-
print("识别已处于暂停状态")
|
|
708
|
-
return
|
|
709
|
-
|
|
710
|
-
start_time = time.time()
|
|
711
|
-
# 停止当前识别器
|
|
712
|
-
if self.recognizer:
|
|
713
|
-
try:
|
|
714
|
-
self.recognizer.stop()
|
|
715
|
-
except Exception:
|
|
716
|
-
pass # 忽略停止时的错误
|
|
717
|
-
|
|
718
|
-
self.pause_event.set()
|
|
719
|
-
with self.lock:
|
|
720
|
-
self.is_paused = True
|
|
721
|
-
|
|
722
|
-
elapsed = (time.time() - start_time) * 1000
|
|
723
|
-
if DEBUG_MODE:
|
|
724
|
-
print(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}|暂停识别|耗时: {elapsed:.2f}ms|状态: 语音识别已暂停")
|
|
725
|
-
else:
|
|
726
|
-
print("语音识别已暂停")
|
|
727
|
-
|
|
728
|
-
def resume(self) -> None:
|
|
729
|
-
"""恢复暂停的语音识别
|
|
730
|
-
|
|
731
|
-
返回值:
|
|
732
|
-
None
|
|
733
|
-
"""
|
|
734
|
-
with self.lock:
|
|
735
|
-
if not self.is_running:
|
|
736
|
-
if DEBUG_MODE:
|
|
737
|
-
print(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}|恢复识别|状态: 识别未在运行中")
|
|
738
|
-
else:
|
|
739
|
-
print("识别未在运行中,无法恢复")
|
|
740
|
-
return
|
|
741
|
-
|
|
742
|
-
if not self.is_paused:
|
|
743
|
-
if DEBUG_MODE:
|
|
744
|
-
print(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}|恢复识别|状态: 识别未处于暂停状态")
|
|
745
|
-
else:
|
|
746
|
-
print("识别未处于暂停状态")
|
|
747
|
-
return
|
|
748
|
-
|
|
749
|
-
start_time = time.time()
|
|
750
|
-
self.pause_event.clear()
|
|
751
|
-
# 恢复时重新创建识别器连接
|
|
752
|
-
self._recreate_recognizer()
|
|
753
|
-
with self.lock:
|
|
754
|
-
self.is_paused = False
|
|
755
|
-
|
|
756
|
-
elapsed = (time.time() - start_time) * 1000
|
|
757
|
-
if DEBUG_MODE:
|
|
758
|
-
print(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}|恢复识别|耗时: {elapsed:.2f}ms|状态: 语音识别已恢复")
|
|
759
|
-
else:
|
|
760
|
-
print("语音识别已恢复")
|
|
761
|
-
|
|
762
|
-
def get_final_result(self) -> str:
|
|
763
|
-
"""获取最终识别结果
|
|
764
|
-
|
|
765
|
-
返回值:
|
|
766
|
-
str: 最终识别结果字符串
|
|
767
|
-
"""
|
|
768
|
-
return self.listener.get_final_result()
|
|
769
|
-
|
|
770
|
-
def get_interim_result(self) -> str:
|
|
771
|
-
"""获取当前的中间识别结果
|
|
772
|
-
|
|
773
|
-
返回值:
|
|
774
|
-
str: 中间识别结果字符串
|
|
775
|
-
"""
|
|
776
|
-
return self.listener.get_interim_result()
|
|
777
|
-
|
|
778
|
-
def is_recognizing(self) -> bool:
|
|
779
|
-
"""判断是否正在识别中(未暂停且运行中)
|
|
780
|
-
|
|
781
|
-
返回值:
|
|
782
|
-
bool: 如果正在识别中返回True,否则返回False
|
|
783
|
-
"""
|
|
784
|
-
with self.lock:
|
|
785
|
-
return self.is_running and not self.is_paused
|
|
786
|
-
|
|
787
|
-
def clear_results(self) -> None:
|
|
788
|
-
"""清除结果缓存,包括中间结果和最终结果
|
|
789
|
-
|
|
790
|
-
返回值:
|
|
791
|
-
None
|
|
792
|
-
"""
|
|
793
|
-
self.listener.clear_results()
|
|
794
|
-
print("结果缓存已清除")
|
|
795
|
-
|
|
796
|
-
|
|
797
|
-
def init(appid: str, secret_id: str, secret_key: str, voice_format: int = 8, sample_rate: int = 16000,
|
|
798
|
-
hotword_id: str = "", result_type: int = 0, slice_size: int = 960,
|
|
799
|
-
vad_silence_timeout: int = 10000, vad_pause_timeout: int = 3000,
|
|
800
|
-
vad_mini_volume: int = 1000, vad_mini_length: int = 500, debug: bool = False,
|
|
801
|
-
vad_threshold: float = 0.7,
|
|
802
|
-
sentence_callback=None, engine_model_type: str = "16k_zh",
|
|
803
|
-
interruption_callback=None) -> bool:
|
|
804
|
-
"""初始化语音识别
|
|
805
|
-
|
|
806
|
-
参数:
|
|
807
|
-
appid (str): 腾讯云ASR应用ID
|
|
808
|
-
secret_id (str): 腾讯云API密钥ID
|
|
809
|
-
secret_key (str): 腾讯云API密钥
|
|
810
|
-
voice_format (int): 语音格式,默认8(PCM)
|
|
811
|
-
sample_rate (int): 采样率,默认16000
|
|
812
|
-
hotword_id (str): 热词ID,默认空
|
|
813
|
-
result_type (int): 结果类型,默认0(仅最终结果)
|
|
814
|
-
slice_size (int): 音频分片大小,默认960
|
|
815
|
-
vad_silence_timeout (int): 静音超时时间(毫秒),默认10000
|
|
816
|
-
vad_pause_timeout (int): 暂停超时时间(毫秒),默认3000
|
|
817
|
-
vad_mini_volume (int): 最小音量阈值,默认1000
|
|
818
|
-
vad_mini_length (int): 最小语音长度(毫秒),默认500
|
|
819
|
-
debug (bool): 是否开启调试模式,默认False
|
|
820
|
-
vad_threshold (float): VAD阈值,默认0.7
|
|
821
|
-
sentence_callback (callable): 句子识别完成回调函数
|
|
822
|
-
engine_model_type (str): 引擎模型类型,默认"16k_zh"
|
|
823
|
-
interruption_callback (callable): 打断回调函数
|
|
824
|
-
|
|
825
|
-
返回值:
|
|
826
|
-
bool: 初始化成功返回True,失败返回False
|
|
827
|
-
"""
|
|
828
|
-
global _ai_asr_instance
|
|
829
|
-
|
|
830
|
-
try:
|
|
831
|
-
# 创建VAD实例
|
|
832
|
-
vad = None
|
|
833
|
-
global ai_vad
|
|
834
|
-
if ai_vad is None:
|
|
835
|
-
try:
|
|
836
|
-
from . import ai_vad
|
|
837
|
-
print("成功导入ai_vad模块")
|
|
838
|
-
except ImportError:
|
|
839
|
-
print("警告: ai_vad模块未找到")
|
|
840
|
-
ai_vad = None
|
|
841
|
-
|
|
842
|
-
if ai_vad is not None:
|
|
843
|
-
try:
|
|
844
|
-
vad = ai_vad.VADProviderBase.create_vad_instance(
|
|
845
|
-
vad_mini_volume, vad_mini_length,
|
|
846
|
-
vad_pause_timeout, vad_silence_timeout,
|
|
847
|
-
vad_threshold
|
|
848
|
-
)
|
|
849
|
-
print("VAD初始化成功,使用EnergyVADProvider")
|
|
850
|
-
except Exception as e:
|
|
851
|
-
print(f"VAD实例创建失败: {e}")
|
|
852
|
-
vad = None
|
|
853
|
-
|
|
854
|
-
_ai_asr_instance = TencentSpeechRecognizer(
|
|
855
|
-
appid=appid,
|
|
856
|
-
secret_id=secret_id,
|
|
857
|
-
secret_key=secret_key,
|
|
858
|
-
engine_model_type=engine_model_type,
|
|
859
|
-
sentence_callback=sentence_callback,
|
|
860
|
-
vad=vad,
|
|
861
|
-
interruption_callback=interruption_callback
|
|
862
|
-
)
|
|
863
|
-
print("语音识别初始化成功")
|
|
864
|
-
return True
|
|
865
|
-
except Exception as e:
|
|
866
|
-
print(f"语音识别初始化失败: {e}")
|
|
867
|
-
import traceback
|
|
868
|
-
traceback.print_exc()
|
|
869
|
-
return False
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
def start() -> None:
|
|
873
|
-
"""开始语音识别
|
|
874
|
-
|
|
875
|
-
返回值:
|
|
876
|
-
None
|
|
877
|
-
"""
|
|
878
|
-
global _ai_asr_instance
|
|
879
|
-
if _ai_asr_instance is None:
|
|
880
|
-
print("语音识别未初始化,请先调用 init() 函数")
|
|
881
|
-
return
|
|
882
|
-
_ai_asr_instance.start()
|
|
883
|
-
|
|
884
|
-
|
|
885
|
-
def stop() -> None:
|
|
886
|
-
"""停止语音识别
|
|
887
|
-
|
|
888
|
-
返回值:
|
|
889
|
-
None
|
|
890
|
-
"""
|
|
891
|
-
global _ai_asr_instance
|
|
892
|
-
if _ai_asr_instance is None:
|
|
893
|
-
print("语音识别未初始化")
|
|
894
|
-
return
|
|
895
|
-
_ai_asr_instance.stop()
|
|
896
|
-
|
|
897
|
-
|
|
898
|
-
def pause() -> None:
|
|
899
|
-
"""暂停语音识别
|
|
900
|
-
|
|
901
|
-
返回值:
|
|
902
|
-
None
|
|
903
|
-
"""
|
|
904
|
-
global _ai_asr_instance
|
|
905
|
-
if _ai_asr_instance is None:
|
|
906
|
-
print("语音识别未初始化")
|
|
907
|
-
return
|
|
908
|
-
_ai_asr_instance.pause()
|
|
909
|
-
|
|
910
|
-
|
|
911
|
-
def resume() -> None:
|
|
912
|
-
"""恢复语音识别
|
|
913
|
-
|
|
914
|
-
返回值:
|
|
915
|
-
None
|
|
916
|
-
"""
|
|
917
|
-
global _ai_asr_instance
|
|
918
|
-
if _ai_asr_instance is None:
|
|
919
|
-
print("语音识别未初始化")
|
|
920
|
-
return
|
|
921
|
-
_ai_asr_instance.resume()
|
|
922
|
-
|
|
923
|
-
|
|
924
|
-
def get_final_result() -> str:
|
|
925
|
-
"""获取最终识别结果
|
|
926
|
-
|
|
927
|
-
返回值:
|
|
928
|
-
str: 最终识别结果字符串
|
|
929
|
-
"""
|
|
930
|
-
global _ai_asr_instance
|
|
931
|
-
if _ai_asr_instance is None:
|
|
932
|
-
print("语音识别未初始化")
|
|
933
|
-
return ""
|
|
934
|
-
return _ai_asr_instance.get_final_result()
|
|
935
|
-
|
|
936
|
-
|
|
937
|
-
def get_interim_result() -> str:
|
|
938
|
-
"""获取当前的中间识别结果
|
|
939
|
-
|
|
940
|
-
返回值:
|
|
941
|
-
str: 中间识别结果字符串
|
|
942
|
-
"""
|
|
943
|
-
global _ai_asr_instance
|
|
944
|
-
if _ai_asr_instance is None:
|
|
945
|
-
print("语音识别未初始化")
|
|
946
|
-
return ""
|
|
947
|
-
return _ai_asr_instance.get_interim_result()
|
|
948
|
-
|
|
949
|
-
|
|
950
|
-
def is_recognizing() -> bool:
|
|
951
|
-
"""判断是否正在识别中(未暂停且运行中)
|
|
952
|
-
|
|
953
|
-
返回值:
|
|
954
|
-
bool: 如果正在识别中返回True,否则返回False
|
|
955
|
-
"""
|
|
956
|
-
global _ai_asr_instance
|
|
957
|
-
if _ai_asr_instance is None:
|
|
958
|
-
print("语音识别未初始化")
|
|
959
|
-
return False
|
|
960
|
-
return _ai_asr_instance.is_recognizing()
|
|
961
|
-
|
|
962
|
-
|
|
963
|
-
def clear_results() -> None:
|
|
964
|
-
"""清除结果缓存,包括中间结果和最终结果
|
|
965
|
-
|
|
966
|
-
返回值:
|
|
967
|
-
None
|
|
968
|
-
"""
|
|
969
|
-
global _ai_asr_instance
|
|
970
|
-
if _ai_asr_instance is None:
|
|
971
|
-
print("语音识别未初始化")
|
|
972
|
-
return
|
|
973
|
-
_ai_asr_instance.clear_results()
|
|
974
|
-
|
|
975
|
-
|
|
976
|
-
def start_asr_time(seconds: int) -> None:
|
|
977
|
-
"""开始语音识别并在指定时间后自动停止
|
|
978
|
-
|
|
979
|
-
参数:
|
|
980
|
-
seconds (int): 语音识别持续时间(秒)
|
|
981
|
-
|
|
982
|
-
返回值:
|
|
983
|
-
None
|
|
984
|
-
"""
|
|
985
|
-
global _ai_asr_instance
|
|
986
|
-
if _ai_asr_instance is None:
|
|
987
|
-
print("语音识别未初始化,请先调用 init() 函数")
|
|
988
|
-
return
|
|
989
|
-
|
|
990
|
-
# 开始语音识别
|
|
991
|
-
start()
|
|
992
|
-
|
|
993
|
-
# 定义定时停止函数
|
|
994
|
-
def stop_after_delay():
|
|
995
|
-
print(f"语音识别将在 {seconds} 秒后自动停止")
|
|
996
|
-
time.sleep(seconds)
|
|
997
|
-
stop()
|
|
998
|
-
|
|
999
|
-
# 启动定时器线程
|
|
1000
|
-
timer_thread = threading.Thread(target=stop_after_delay, daemon=True)
|
|
1001
|
-
timer_thread.start()
|
|
1002
|
-
|
|
1003
|
-
|
|
1004
|
-
# 如果直接运行该文件,提供简单的测试功能
|
|
1005
|
-
if __name__ == "__main__":
|
|
1006
|
-
import sys
|
|
1007
|
-
|
|
1008
|
-
if len(sys.argv) < 4:
|
|
1009
|
-
print("用法: python ai_asr.py <appid> <secret_id> <secret_key>")
|
|
1010
|
-
sys.exit(1)
|
|
1011
|
-
|
|
1012
|
-
appid = sys.argv[1]
|
|
1013
|
-
secret_id = sys.argv[2]
|
|
1014
|
-
secret_key = sys.argv[3]
|
|
1015
|
-
|
|
1016
|
-
# 初始化语音识别
|
|
1017
|
-
if not init(appid, secret_id, secret_key):
|
|
1018
|
-
sys.exit(1)
|
|
1019
|
-
|
|
1020
|
-
# 开始识别
|
|
1021
|
-
start()
|
|
1022
|
-
|
|
1023
|
-
try:
|
|
1024
|
-
while True:
|
|
1025
|
-
time.sleep(1)
|
|
1026
|
-
interim = get_interim_result()
|
|
1027
|
-
if interim:
|
|
1028
|
-
print(f"中间结果: {interim}")
|
|
1029
|
-
|
|
1030
|
-
final = get_final_result()
|
|
1031
|
-
if final:
|
|
1032
|
-
print(f"最终结果: {final}")
|
|
1033
|
-
clear_results()
|
|
1034
|
-
except KeyboardInterrupt:
|
|
1035
|
-
print("\n停止语音识别...")
|
|
1036
|
-
stop()
|
|
1037
|
-
sys.exit(0)
|