smartpi 0.1.40__py3-none-any.whl → 0.1.42__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- smartpi/__init__.py +1 -1
- smartpi/ai_asr.py +1037 -0
- smartpi/ai_llm.py +934 -0
- smartpi/ai_tts.py +938 -0
- smartpi/ai_vad.py +83 -0
- smartpi/base_driver.py +265 -11
- smartpi/local_model.py +432 -0
- smartpi/mcp_client.py +100 -0
- smartpi/mcp_fastmcp.py +322 -0
- smartpi/mcp_intent_recognizer.py +408 -0
- smartpi/models/__init__.py +0 -0
- smartpi/models/snakers4_silero-vad/__init__.py +0 -0
- smartpi/models/snakers4_silero-vad/hubconf.py +56 -0
- smartpi/models/snakers4_silero-vad/src/silero_vad/data/silero_vad.jit +0 -0
- smartpi/models/snakers4_silero-vad/src/silero_vad/data/silero_vad.onnx +0 -0
- smartpi/models/snakers4_silero-vad/src/silero_vad/data/silero_vad_16k_op15.onnx +0 -0
- smartpi/models/snakers4_silero-vad/src/silero_vad/data/silero_vad_half.onnx +0 -0
- smartpi/onnx_pose_workflow.py +1 -1
- smartpi/rknn_pose_workflow.py +1 -1
- smartpi/tencentcloud-speech-sdk-python/__init__.py +1 -0
- smartpi/tencentcloud-speech-sdk-python/asr/__init__.py +0 -0
- smartpi/tencentcloud-speech-sdk-python/asr/flash_recognizer.py +178 -0
- smartpi/tencentcloud-speech-sdk-python/asr/speech_recognizer.py +311 -0
- smartpi/tencentcloud-speech-sdk-python/common/__init__.py +1 -0
- smartpi/tencentcloud-speech-sdk-python/common/credential.py +6 -0
- smartpi/tencentcloud-speech-sdk-python/common/log.py +16 -0
- smartpi/tencentcloud-speech-sdk-python/common/utils.py +7 -0
- smartpi/tencentcloud-speech-sdk-python/examples/tts/tts_text.txt +60 -0
- smartpi/tencentcloud-speech-sdk-python/soe/__init__.py +0 -0
- smartpi/tencentcloud-speech-sdk-python/soe/speaking_assessment.py +276 -0
- smartpi/tencentcloud-speech-sdk-python/tts/__init__.py +0 -0
- smartpi/tencentcloud-speech-sdk-python/tts/flowing_speech_synthesizer.py +294 -0
- smartpi/tencentcloud-speech-sdk-python/tts/speech_synthesizer.py +144 -0
- smartpi/tencentcloud-speech-sdk-python/tts/speech_synthesizer_ws.py +234 -0
- smartpi/tencentcloud-speech-sdk-python/vc/__init__.py +0 -0
- smartpi/tencentcloud-speech-sdk-python/vc/speech_convertor_ws.py +237 -0
- {smartpi-0.1.40.dist-info → smartpi-0.1.42.dist-info}/METADATA +1 -1
- smartpi-0.1.42.dist-info/RECORD +76 -0
- smartpi-0.1.40.dist-info/RECORD +0 -44
- {smartpi-0.1.40.dist-info → smartpi-0.1.42.dist-info}/WHEEL +0 -0
- {smartpi-0.1.40.dist-info → smartpi-0.1.42.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,311 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
import sys
|
|
3
|
+
import hmac
|
|
4
|
+
import hashlib
|
|
5
|
+
import base64
|
|
6
|
+
import time
|
|
7
|
+
import json
|
|
8
|
+
import threading
|
|
9
|
+
import websocket
|
|
10
|
+
import uuid
|
|
11
|
+
import urllib
|
|
12
|
+
from common.log import logger
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def is_python3():
|
|
16
|
+
if sys.version > '3':
|
|
17
|
+
return True
|
|
18
|
+
return False
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
#实时识别语音使用
|
|
22
|
+
class SpeechRecognitionListener():
|
|
23
|
+
'''
|
|
24
|
+
reponse:
|
|
25
|
+
on_recognition_start的返回只有voice_id字段。
|
|
26
|
+
on_fail 只有voice_id、code、message字段。
|
|
27
|
+
on_recognition_complete没有result字段。
|
|
28
|
+
其余消息包含所有字段。
|
|
29
|
+
字段名 类型
|
|
30
|
+
code Integer
|
|
31
|
+
message String
|
|
32
|
+
voice_id String
|
|
33
|
+
message_id String
|
|
34
|
+
result Result
|
|
35
|
+
final Integer
|
|
36
|
+
|
|
37
|
+
Result的结构体格式为:
|
|
38
|
+
slice_type Integer
|
|
39
|
+
index Integer
|
|
40
|
+
start_time Integer
|
|
41
|
+
end_time Integer
|
|
42
|
+
voice_text_str String
|
|
43
|
+
word_size Integer
|
|
44
|
+
word_list Word Array
|
|
45
|
+
|
|
46
|
+
Word的类型为:
|
|
47
|
+
word String
|
|
48
|
+
start_time Integer
|
|
49
|
+
end_time Integer
|
|
50
|
+
stable_flag:Integer
|
|
51
|
+
'''
|
|
52
|
+
|
|
53
|
+
def on_recognition_start(self, response):
|
|
54
|
+
pass
|
|
55
|
+
|
|
56
|
+
def on_sentence_begin(self, response):
|
|
57
|
+
pass
|
|
58
|
+
|
|
59
|
+
def on_recognition_result_change(self, response):
|
|
60
|
+
pass
|
|
61
|
+
|
|
62
|
+
def on_sentence_end(self, response):
|
|
63
|
+
pass
|
|
64
|
+
|
|
65
|
+
def on_recognition_complete(self, response):
|
|
66
|
+
pass
|
|
67
|
+
|
|
68
|
+
def on_fail(self, response):
|
|
69
|
+
pass
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
NOTOPEN = 0
|
|
73
|
+
STARTED = 1
|
|
74
|
+
OPENED = 2
|
|
75
|
+
FINAL = 3
|
|
76
|
+
ERROR = 4
|
|
77
|
+
CLOSED = 5
|
|
78
|
+
|
|
79
|
+
#实时识别语音使用
|
|
80
|
+
class SpeechRecognizer:
|
|
81
|
+
|
|
82
|
+
def __init__(self, appid, credential, engine_model_type, listener):
|
|
83
|
+
self.result = ""
|
|
84
|
+
self.credential = credential
|
|
85
|
+
self.appid = appid
|
|
86
|
+
self.engine_model_type = engine_model_type
|
|
87
|
+
self.status = NOTOPEN
|
|
88
|
+
self.ws = None
|
|
89
|
+
self.wst = None
|
|
90
|
+
self.voice_id = ""
|
|
91
|
+
self.new_start = 0
|
|
92
|
+
self.listener = listener
|
|
93
|
+
self.filter_dirty = 0
|
|
94
|
+
self.filter_modal = 0
|
|
95
|
+
self.filter_punc = 0
|
|
96
|
+
self.convert_num_mode = 0
|
|
97
|
+
self.word_info = 0
|
|
98
|
+
self.need_vad = 0
|
|
99
|
+
self.vad_silence_time = 0
|
|
100
|
+
self.hotword_id = ""
|
|
101
|
+
self.hotword_list = ""
|
|
102
|
+
self.reinforce_hotword = 0
|
|
103
|
+
self.noise_threshold = 0
|
|
104
|
+
self.voice_format = 4
|
|
105
|
+
self.nonce = ""
|
|
106
|
+
self.replace_text_id = ""
|
|
107
|
+
|
|
108
|
+
def set_filter_dirty(self, filter_dirty):
|
|
109
|
+
self.filter_dirty = filter_dirty
|
|
110
|
+
|
|
111
|
+
def set_filter_modal(self, filter_modal):
|
|
112
|
+
self.filter_modal = filter_modal
|
|
113
|
+
|
|
114
|
+
def set_filter_punc(self, filter_punc):
|
|
115
|
+
self.filter_punc = filter_punc
|
|
116
|
+
|
|
117
|
+
def set_convert_num_mode(self, convert_num_mode):
|
|
118
|
+
self.convert_num_mode = convert_num_mode
|
|
119
|
+
|
|
120
|
+
def set_word_info(self, word_info):
|
|
121
|
+
self.word_info = word_info
|
|
122
|
+
|
|
123
|
+
def set_need_vad(self, need_vad):
|
|
124
|
+
self.need_vad = need_vad
|
|
125
|
+
|
|
126
|
+
def set_vad_silence_time(self, vad_silence_time):
|
|
127
|
+
self.vad_silence_time = vad_silence_time
|
|
128
|
+
|
|
129
|
+
def set_hotword_id(self, hotword_id):
|
|
130
|
+
self.hotword_id = hotword_id
|
|
131
|
+
|
|
132
|
+
def set_hotword_list(self, hotword_list):
|
|
133
|
+
self.hotword_list = hotword_list
|
|
134
|
+
|
|
135
|
+
def set_voice_format(self, voice_format):
|
|
136
|
+
self.voice_format = voice_format
|
|
137
|
+
|
|
138
|
+
def set_nonce(self, nonce):
|
|
139
|
+
self.nonce = nonce
|
|
140
|
+
|
|
141
|
+
def set_reinforce_hotword(self, reinforce_hotword):
|
|
142
|
+
self.reinforce_hotword = reinforce_hotword
|
|
143
|
+
|
|
144
|
+
def set_noise_threshold(self, noise_threshold):
|
|
145
|
+
self.noise_threshold = noise_threshold
|
|
146
|
+
|
|
147
|
+
def set_replace_text_id(self, replace_text_id):
|
|
148
|
+
self.replace_text_id = replace_text_id
|
|
149
|
+
|
|
150
|
+
def format_sign_string(self, param):
|
|
151
|
+
signstr = "asr.cloud.tencent.com/asr/v2/"
|
|
152
|
+
for t in param:
|
|
153
|
+
if 'appid' in t:
|
|
154
|
+
signstr += str(t[1])
|
|
155
|
+
break
|
|
156
|
+
signstr += "?"
|
|
157
|
+
for x in param:
|
|
158
|
+
tmp = x
|
|
159
|
+
if 'appid' in x:
|
|
160
|
+
continue
|
|
161
|
+
for t in tmp:
|
|
162
|
+
signstr += str(t)
|
|
163
|
+
signstr += "="
|
|
164
|
+
signstr = signstr[:-1]
|
|
165
|
+
signstr += "&"
|
|
166
|
+
signstr = signstr[:-1]
|
|
167
|
+
return signstr
|
|
168
|
+
|
|
169
|
+
def create_query_string(self, param):
|
|
170
|
+
signstr = "wss://asr.cloud.tencent.com/asr/v2/"
|
|
171
|
+
for t in param:
|
|
172
|
+
if 'appid' in t:
|
|
173
|
+
signstr += str(t[1])
|
|
174
|
+
break
|
|
175
|
+
signstr += "?"
|
|
176
|
+
for x in param:
|
|
177
|
+
tmp = x
|
|
178
|
+
if 'appid' in x:
|
|
179
|
+
continue
|
|
180
|
+
for t in tmp:
|
|
181
|
+
signstr += str(t)
|
|
182
|
+
signstr += "="
|
|
183
|
+
signstr = signstr[:-1]
|
|
184
|
+
signstr += "&"
|
|
185
|
+
signstr = signstr[:-1]
|
|
186
|
+
return signstr
|
|
187
|
+
|
|
188
|
+
def sign(self, signstr, secret_key):
|
|
189
|
+
hmacstr = hmac.new(secret_key.encode('utf-8'),
|
|
190
|
+
signstr.encode('utf-8'), hashlib.sha1).digest()
|
|
191
|
+
s = base64.b64encode(hmacstr)
|
|
192
|
+
s = s.decode('utf-8')
|
|
193
|
+
return s
|
|
194
|
+
|
|
195
|
+
def create_query_arr(self):
|
|
196
|
+
query_arr = dict()
|
|
197
|
+
|
|
198
|
+
query_arr['appid'] = self.appid
|
|
199
|
+
query_arr['sub_service_type'] = 1
|
|
200
|
+
query_arr['engine_model_type'] = self.engine_model_type
|
|
201
|
+
query_arr['filter_dirty'] = self.filter_dirty
|
|
202
|
+
query_arr['filter_modal'] = self.filter_modal
|
|
203
|
+
query_arr['filter_punc'] = self.filter_punc
|
|
204
|
+
query_arr['needvad'] = self.need_vad
|
|
205
|
+
query_arr['convert_num_mode'] = self.convert_num_mode
|
|
206
|
+
query_arr['word_info'] = self.word_info
|
|
207
|
+
if self.vad_silence_time != 0:
|
|
208
|
+
query_arr['vad_silence_time'] = self.vad_silence_time
|
|
209
|
+
if self.hotword_id != "":
|
|
210
|
+
query_arr['hotword_id'] = self.hotword_id
|
|
211
|
+
if self.hotword_list != "":
|
|
212
|
+
query_arr['hotword_list'] = self.hotword_list
|
|
213
|
+
if self.replace_text_id != "":
|
|
214
|
+
query_arr['replace_text_id'] = self.replace_text_id
|
|
215
|
+
query_arr['secretid'] = self.credential.secret_id
|
|
216
|
+
query_arr['voice_format'] = self.voice_format
|
|
217
|
+
query_arr['voice_id'] = self.voice_id
|
|
218
|
+
query_arr['timestamp'] = str(int(time.time()))
|
|
219
|
+
if self.nonce != "":
|
|
220
|
+
query_arr['nonce'] = self.nonce
|
|
221
|
+
else:
|
|
222
|
+
query_arr['nonce'] = query_arr['timestamp']
|
|
223
|
+
query_arr['expired'] = int(time.time()) + 24 * 60 * 60
|
|
224
|
+
query_arr['reinforce_hotword'] = self.reinforce_hotword
|
|
225
|
+
query_arr['noise_threshold'] = self.noise_threshold
|
|
226
|
+
return query_arr
|
|
227
|
+
|
|
228
|
+
def stop(self):
|
|
229
|
+
if self.status == OPENED:
|
|
230
|
+
msg = {}
|
|
231
|
+
msg['type'] = "end"
|
|
232
|
+
text_str = json.dumps(msg)
|
|
233
|
+
self.ws.sock.send(text_str)
|
|
234
|
+
if self.ws:
|
|
235
|
+
if self.wst and self.wst.is_alive():
|
|
236
|
+
self.wst.join()
|
|
237
|
+
self.ws.close()
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
def write(self, data):
|
|
241
|
+
while self.status == STARTED:
|
|
242
|
+
time.sleep(0.1)
|
|
243
|
+
if self.status == OPENED:
|
|
244
|
+
self.ws.sock.send_binary(data)
|
|
245
|
+
|
|
246
|
+
def start(self):
|
|
247
|
+
def on_message(ws, message):
|
|
248
|
+
response = json.loads(message)
|
|
249
|
+
response['voice_id'] = self.voice_id
|
|
250
|
+
if response['code'] != 0:
|
|
251
|
+
logger.error("%s server recognition fail %s" %
|
|
252
|
+
(response['voice_id'], response['message']))
|
|
253
|
+
self.listener.on_fail(response)
|
|
254
|
+
return
|
|
255
|
+
if "final" in response and response["final"] == 1:
|
|
256
|
+
self.status = FINAL
|
|
257
|
+
self.result = message
|
|
258
|
+
self.listener.on_recognition_complete(response)
|
|
259
|
+
logger.info("%s recognition complete" % response['voice_id'])
|
|
260
|
+
return
|
|
261
|
+
if "result" in response.keys():
|
|
262
|
+
if response["result"]['slice_type'] == 0:
|
|
263
|
+
self.listener.on_sentence_begin(response)
|
|
264
|
+
return
|
|
265
|
+
elif response["result"]["slice_type"] == 2:
|
|
266
|
+
self.listener.on_sentence_end(response)
|
|
267
|
+
return
|
|
268
|
+
elif response["result"]["slice_type"] == 1:
|
|
269
|
+
self.listener.on_recognition_result_change(response)
|
|
270
|
+
return
|
|
271
|
+
|
|
272
|
+
def on_error(ws, error):
|
|
273
|
+
if self.status == FINAL :
|
|
274
|
+
return
|
|
275
|
+
logger.error("websocket error %s voice id %s" %
|
|
276
|
+
(format(error), self.voice_id))
|
|
277
|
+
self.status = ERROR
|
|
278
|
+
|
|
279
|
+
def on_close(ws):
|
|
280
|
+
self.status = CLOSED
|
|
281
|
+
logger.info("websocket closed voice id %s" %
|
|
282
|
+
self.voice_id)
|
|
283
|
+
|
|
284
|
+
def on_open(ws):
|
|
285
|
+
self.status = OPENED
|
|
286
|
+
|
|
287
|
+
query_arr = self.create_query_arr()
|
|
288
|
+
if self.voice_id == "":
|
|
289
|
+
query_arr['voice_id'] = str(uuid.uuid1())
|
|
290
|
+
self.voice_id = query_arr['voice_id']
|
|
291
|
+
query = sorted(query_arr.items(), key=lambda d: d[0])
|
|
292
|
+
signstr = self.format_sign_string(query)
|
|
293
|
+
|
|
294
|
+
autho = self.sign(signstr, self.credential.secret_key)
|
|
295
|
+
requrl = self.create_query_string(query)
|
|
296
|
+
if is_python3():
|
|
297
|
+
autho = urllib.parse.quote(autho)
|
|
298
|
+
else:
|
|
299
|
+
autho = urllib.quote(autho)
|
|
300
|
+
requrl += "&signature=%s" % autho
|
|
301
|
+
self.ws = websocket.WebSocketApp(requrl, None,
|
|
302
|
+
on_error=on_error, on_close=on_close, on_message=on_message)
|
|
303
|
+
self.ws.on_open = on_open
|
|
304
|
+
self.wst = threading.Thread(target=self.ws.run_forever)
|
|
305
|
+
self.wst.daemon = True
|
|
306
|
+
self.wst.start()
|
|
307
|
+
self.status = STARTED
|
|
308
|
+
response = {}
|
|
309
|
+
response['voice_id'] = self.voice_id
|
|
310
|
+
self.listener.on_recognition_start(response)
|
|
311
|
+
logger.info("%s recognition start" % response['voice_id'])
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
#!#-*-coding:utf-8 -*-
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import logging.handlers
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
FORMAT = '%(asctime)15s %(name)s-%(levelname)s %(funcName)s:%(lineno)s %(message)s'
|
|
7
|
+
logging.basicConfig(level=logging.DEBUG, format=FORMAT)
|
|
8
|
+
logger = logging.getLogger('tencent_speech.log')
|
|
9
|
+
|
|
10
|
+
handler = logging.handlers.RotatingFileHandler('tencent_speech.log', maxBytes=1024 * 1024,
|
|
11
|
+
backupCount=5, encoding='utf-8')
|
|
12
|
+
handler.setLevel(logging.DEBUG)
|
|
13
|
+
handler.setFormatter(logging.Formatter(FORMAT))
|
|
14
|
+
logger.addHandler(handler)
|
|
15
|
+
logger.setLevel('INFO')
|
|
16
|
+
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
床前明月光,疑是地上霜。
|
|
2
|
+
举头望明月,低头思故乡。
|
|
3
|
+
红豆生南国,春来发几枝。
|
|
4
|
+
君自故乡来,应知故乡事。
|
|
5
|
+
春眠不觉晓,处处闻啼鸟。
|
|
6
|
+
欲穷千里目,更上一层楼。
|
|
7
|
+
黄河远上白云间,一片孤城万仞山。
|
|
8
|
+
两岸猿声啼不住,轻舟已过万重山。
|
|
9
|
+
独在异乡为异客,每逢佳节倍思亲。
|
|
10
|
+
春城无处不飞花,寒食东风御柳斜。
|
|
11
|
+
月落乌啼霜满天,江枫渔火对愁眠。
|
|
12
|
+
春潮带雨晚来急,野渡无人舟自横。
|
|
13
|
+
洛阳亲友如相问,一片冰心在玉壶。
|
|
14
|
+
空山新雨后,天气晚来秋。
|
|
15
|
+
千里莺啼绿映红,水村山郭酒旗风。
|
|
16
|
+
朝辞白帝彩云间,千里江陵一日还。
|
|
17
|
+
会当凌绝顶,一览众山小。
|
|
18
|
+
野旷天低树,江清月近人。
|
|
19
|
+
国破山河在,城春草木深。
|
|
20
|
+
感时花溅泪,恨别鸟惊心。
|
|
21
|
+
白日依山尽,黄河入海流。
|
|
22
|
+
红豆不堪看,满眼相思泪。
|
|
23
|
+
独怜幽草涧边生,上有黄鹂深树鸣。
|
|
24
|
+
云想衣裳花想容,春风拂槛露华浓。
|
|
25
|
+
夜来风雨声,花落知多少。
|
|
26
|
+
江水流春去欲尽,江潭落月复西斜。
|
|
27
|
+
独在异乡为异客,每逢佳节倍思亲。
|
|
28
|
+
春江潮水连海平,海上明月共潮生。
|
|
29
|
+
此夜曲中闻折柳,何人不起故园情。
|
|
30
|
+
无边落木萧萧下,不尽长江滚滚来。
|
|
31
|
+
春心莫共花争发,一寸相思一寸灰。
|
|
32
|
+
月出惊山鸟,时鸣春涧中。
|
|
33
|
+
江畔何人初见月,江月何年初照人。
|
|
34
|
+
谁家玉笛暗飞声,散入春风满洛城。
|
|
35
|
+
草长莺飞二月天,拂堤杨柳醉春烟。
|
|
36
|
+
春风又绿江南岸,明月何时照我还。
|
|
37
|
+
露从今夜白,月是故乡明。
|
|
38
|
+
君问归期未有期,巴山夜雨涨秋池。
|
|
39
|
+
秋水共长天一色,落霞与孤鹜齐飞。
|
|
40
|
+
月黑雁飞高,单于夜遁逃。
|
|
41
|
+
风急天高猿啸哀,渚清沙白鸟飞回。
|
|
42
|
+
星垂平野阔,月涌大江流。
|
|
43
|
+
秦时明月汉时关,万里长征人未还。
|
|
44
|
+
沧海月明珠有泪,蓝田日暖玉生烟。
|
|
45
|
+
独上江楼思渺然,月光如水水如天。
|
|
46
|
+
相看两不厌,只有敬亭山。
|
|
47
|
+
桃花流水窅然去,别有天地非人间。
|
|
48
|
+
黄鹤一去不复返,白云千载空悠悠。
|
|
49
|
+
朱雀桥边野草花,乌衣巷口夕阳斜。
|
|
50
|
+
绿蚁新醅酒,红泥小火炉。
|
|
51
|
+
姑苏城外寒山寺,夜半钟声到客船。
|
|
52
|
+
银烛秋光冷画屏,轻罗小扇扑流萤。
|
|
53
|
+
碧玉妆成一树高,万条垂下绿丝绦。
|
|
54
|
+
渭城朝雨浥轻尘,客舍青青柳色新。
|
|
55
|
+
忽如一夜春风来,千树万树梨花开。
|
|
56
|
+
故人西辞黄鹤楼,烟花三月下扬州。
|
|
57
|
+
洛阳城里春光好,洛阳才子他乡老。
|
|
58
|
+
江南好,风景旧曾谙。
|
|
59
|
+
江南可采莲,莲叶何田田。
|
|
60
|
+
采得百花成蜜后,为谁辛苦为谁甜。
|
|
File without changes
|
|
@@ -0,0 +1,276 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
import sys
|
|
3
|
+
import hmac
|
|
4
|
+
import hashlib
|
|
5
|
+
import base64
|
|
6
|
+
import time
|
|
7
|
+
import json
|
|
8
|
+
import threading
|
|
9
|
+
import urllib
|
|
10
|
+
|
|
11
|
+
import websocket
|
|
12
|
+
import uuid
|
|
13
|
+
from urllib.parse import quote
|
|
14
|
+
from common.log import logger
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def is_python3():
|
|
18
|
+
if sys.version > '3':
|
|
19
|
+
return True
|
|
20
|
+
return False
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
# 实时识别语音使用
|
|
24
|
+
class SpeakingAssessmentListener():
|
|
25
|
+
'''
|
|
26
|
+
reponse:
|
|
27
|
+
on_recognition_start的返回只有voice_id字段。
|
|
28
|
+
on_fail 只有voice_id、code、message字段。
|
|
29
|
+
on_recognition_complete没有result字段。
|
|
30
|
+
其余消息包含所有字段。
|
|
31
|
+
字段名 类型
|
|
32
|
+
code Integer
|
|
33
|
+
message String
|
|
34
|
+
voice_id String
|
|
35
|
+
message_id String
|
|
36
|
+
result
|
|
37
|
+
final Integer
|
|
38
|
+
|
|
39
|
+
# Result的结构体格式为:
|
|
40
|
+
# slice_type Integer
|
|
41
|
+
# index Integer
|
|
42
|
+
# start_time Integer
|
|
43
|
+
# end_time Integer
|
|
44
|
+
# voice_text_str String
|
|
45
|
+
# word_size Integer
|
|
46
|
+
# word_list Word Array
|
|
47
|
+
#
|
|
48
|
+
# Word的类型为:
|
|
49
|
+
# word String
|
|
50
|
+
# start_time Integer
|
|
51
|
+
# end_time Integer
|
|
52
|
+
# stable_flag:Integer
|
|
53
|
+
'''
|
|
54
|
+
|
|
55
|
+
def on_recognition_start(self, response):
|
|
56
|
+
pass
|
|
57
|
+
|
|
58
|
+
def on_intermediate_result(self, response):
|
|
59
|
+
pass
|
|
60
|
+
|
|
61
|
+
def on_recognition_complete(self, response):
|
|
62
|
+
pass
|
|
63
|
+
|
|
64
|
+
def on_fail(self, response):
|
|
65
|
+
pass
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
NOTOPEN = 0
|
|
69
|
+
STARTED = 1
|
|
70
|
+
OPENED = 2
|
|
71
|
+
FINAL = 3
|
|
72
|
+
ERROR = 4
|
|
73
|
+
CLOSED = 5
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def quote_autho(autho):
|
|
77
|
+
if sys.version_info >= (3, 0):
|
|
78
|
+
import urllib.parse as urlparse
|
|
79
|
+
return urlparse.quote(autho)
|
|
80
|
+
else:
|
|
81
|
+
return urllib.quote(autho)
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
# 实时识别使用
|
|
85
|
+
class SpeakingAssessment:
|
|
86
|
+
|
|
87
|
+
def __init__(self, appid, credential, engine_model_type, listener):
|
|
88
|
+
self.result = ""
|
|
89
|
+
self.credential = credential
|
|
90
|
+
self.appid = appid
|
|
91
|
+
self.server_engine_type = engine_model_type
|
|
92
|
+
self.status = NOTOPEN
|
|
93
|
+
self.ws = None
|
|
94
|
+
self.wst = None
|
|
95
|
+
self.voice_id = ""
|
|
96
|
+
self.new_start = 0
|
|
97
|
+
self.listener = listener
|
|
98
|
+
self.text_mode = 0
|
|
99
|
+
self.ref_text = ""
|
|
100
|
+
self.keyword = ""
|
|
101
|
+
self.eval_mode = 0
|
|
102
|
+
self.score_coeff = 1.0
|
|
103
|
+
self.sentence_info_enabled = 0
|
|
104
|
+
self.voice_format = 0
|
|
105
|
+
self.nonce = ""
|
|
106
|
+
self.rec_mode = 0
|
|
107
|
+
|
|
108
|
+
def set_text_mode(self, text_mode):
|
|
109
|
+
self.text_mode = text_mode
|
|
110
|
+
|
|
111
|
+
def set_rec_mode(self, rec_mode):
|
|
112
|
+
self.rec_mode = rec_mode
|
|
113
|
+
|
|
114
|
+
def set_ref_text(self, ref_text):
|
|
115
|
+
self.ref_text = ref_text
|
|
116
|
+
|
|
117
|
+
def set_keyword(self, keyword):
|
|
118
|
+
self.keyword = keyword
|
|
119
|
+
|
|
120
|
+
def set_eval_mode(self, eval_mode):
|
|
121
|
+
self.eval_mode = eval_mode
|
|
122
|
+
|
|
123
|
+
def set_sentence_info_enabled(self, sentence_info_enabled):
|
|
124
|
+
self.sentence_info_enabled = sentence_info_enabled
|
|
125
|
+
|
|
126
|
+
def set_voice_format(self, voice_format):
|
|
127
|
+
self.voice_format = voice_format
|
|
128
|
+
|
|
129
|
+
def set_nonce(self, nonce):
|
|
130
|
+
self.nonce = nonce
|
|
131
|
+
|
|
132
|
+
def format_sign_string(self, param):
|
|
133
|
+
signstr = "soe.cloud.tencent.com/soe/api/"
|
|
134
|
+
for t in param:
|
|
135
|
+
if 'appid' in t:
|
|
136
|
+
signstr += str(t[1])
|
|
137
|
+
break
|
|
138
|
+
signstr += "?"
|
|
139
|
+
for x in param:
|
|
140
|
+
tmp = x
|
|
141
|
+
if 'appid' in x:
|
|
142
|
+
continue
|
|
143
|
+
for t in tmp:
|
|
144
|
+
signstr += str(t)
|
|
145
|
+
signstr += "="
|
|
146
|
+
signstr = signstr[:-1]
|
|
147
|
+
signstr += "&"
|
|
148
|
+
signstr = signstr[:-1]
|
|
149
|
+
return signstr
|
|
150
|
+
|
|
151
|
+
def create_query_string(self, param):
|
|
152
|
+
signstr = ""
|
|
153
|
+
for key, value in param.items():
|
|
154
|
+
if key == 'appid':
|
|
155
|
+
signstr += str(value)
|
|
156
|
+
break
|
|
157
|
+
signstr += "?"
|
|
158
|
+
for key, value in param.items():
|
|
159
|
+
if key == 'appid':
|
|
160
|
+
continue
|
|
161
|
+
value = quote_autho(str(value))
|
|
162
|
+
signstr += str(key) + "=" + str(value) + "&"
|
|
163
|
+
signstr = signstr[:-1]
|
|
164
|
+
return "wss://soe.cloud.tencent.com/soe/api/" + signstr
|
|
165
|
+
|
|
166
|
+
def sign(self, signstr, secret_key):
|
|
167
|
+
hmacstr = hmac.new(secret_key.encode('utf-8'),
|
|
168
|
+
signstr.encode('utf-8'), hashlib.sha1).digest()
|
|
169
|
+
s = base64.b64encode(hmacstr)
|
|
170
|
+
s = s.decode('utf-8')
|
|
171
|
+
return s
|
|
172
|
+
|
|
173
|
+
def create_query_arr(self):
|
|
174
|
+
query_arr = dict()
|
|
175
|
+
|
|
176
|
+
query_arr['appid'] = self.appid
|
|
177
|
+
query_arr['server_engine_type'] = self.server_engine_type
|
|
178
|
+
query_arr['text_mode'] = self.text_mode
|
|
179
|
+
query_arr['rec_mode'] = self.rec_mode
|
|
180
|
+
query_arr['ref_text'] = self.ref_text
|
|
181
|
+
query_arr['keyword'] = self.keyword
|
|
182
|
+
query_arr['eval_mode'] = self.eval_mode
|
|
183
|
+
query_arr['score_coeff'] = self.score_coeff
|
|
184
|
+
query_arr['sentence_info_enabled'] = self.sentence_info_enabled
|
|
185
|
+
query_arr['secretid'] = self.credential.secret_id
|
|
186
|
+
if self.credential.token != "":
|
|
187
|
+
query_arr['token'] = self.credential.token
|
|
188
|
+
query_arr['voice_format'] = self.voice_format
|
|
189
|
+
query_arr['voice_id'] = self.voice_id
|
|
190
|
+
query_arr['timestamp'] = str(int(time.time()))
|
|
191
|
+
if self.nonce != "":
|
|
192
|
+
query_arr['nonce'] = self.nonce
|
|
193
|
+
else:
|
|
194
|
+
query_arr['nonce'] = query_arr['timestamp']
|
|
195
|
+
query_arr['expired'] = int(time.time()) + 24 * 60 * 60
|
|
196
|
+
return query_arr
|
|
197
|
+
|
|
198
|
+
def stop(self):
|
|
199
|
+
if self.status == OPENED:
|
|
200
|
+
msg = {'type': "end"}
|
|
201
|
+
text_str = json.dumps(msg)
|
|
202
|
+
self.ws.sock.send(text_str)
|
|
203
|
+
if self.ws:
|
|
204
|
+
if self.wst and self.wst.is_alive():
|
|
205
|
+
self.wst.join()
|
|
206
|
+
self.ws.close()
|
|
207
|
+
|
|
208
|
+
def write(self, data):
|
|
209
|
+
while self.status == STARTED:
|
|
210
|
+
time.sleep(0.1)
|
|
211
|
+
if self.status == OPENED:
|
|
212
|
+
self.ws.sock.send_binary(data)
|
|
213
|
+
|
|
214
|
+
def start(self):
|
|
215
|
+
def on_message(ws, message):
|
|
216
|
+
# print(message)
|
|
217
|
+
response = json.loads(message)
|
|
218
|
+
response['voice_id'] = self.voice_id
|
|
219
|
+
if response['code'] != 0:
|
|
220
|
+
logger.error("%s server recognition fail %s" %
|
|
221
|
+
(response['voice_id'], response['message']))
|
|
222
|
+
self.listener.on_fail(response)
|
|
223
|
+
return
|
|
224
|
+
if "final" in response and response["final"] == 1:
|
|
225
|
+
self.status = FINAL
|
|
226
|
+
self.result = message
|
|
227
|
+
self.listener.on_recognition_complete(response)
|
|
228
|
+
logger.info("%s recognition complete" % response['voice_id'])
|
|
229
|
+
self.ws.close()
|
|
230
|
+
return
|
|
231
|
+
else:
|
|
232
|
+
if response["result"] is not None:
|
|
233
|
+
self.listener.on_intermediate_result(response)
|
|
234
|
+
logger.info("%s recognition doing" % response['voice_id'])
|
|
235
|
+
return
|
|
236
|
+
|
|
237
|
+
def on_error(ws, error):
|
|
238
|
+
if self.status == FINAL:
|
|
239
|
+
return
|
|
240
|
+
logger.error("websocket error %s voice id %s" %
|
|
241
|
+
(format(error), self.voice_id))
|
|
242
|
+
self.status = ERROR
|
|
243
|
+
|
|
244
|
+
def on_close(ws):
|
|
245
|
+
self.status = CLOSED
|
|
246
|
+
logger.info("websocket closed voice id %s" %
|
|
247
|
+
self.voice_id)
|
|
248
|
+
|
|
249
|
+
def on_open(ws):
|
|
250
|
+
self.status = OPENED
|
|
251
|
+
|
|
252
|
+
query_arr = self.create_query_arr()
|
|
253
|
+
if self.voice_id == "":
|
|
254
|
+
query_arr['voice_id'] = str(uuid.uuid1())
|
|
255
|
+
self.voice_id = query_arr['voice_id']
|
|
256
|
+
query = sorted(query_arr.items(), key=lambda d: d[0])
|
|
257
|
+
signstr = self.format_sign_string(query)
|
|
258
|
+
autho = self.sign(signstr, self.credential.secret_key)
|
|
259
|
+
requrl = self.create_query_string(query_arr)
|
|
260
|
+
print(requrl)
|
|
261
|
+
if is_python3():
|
|
262
|
+
autho = urllib.parse.quote(autho)
|
|
263
|
+
else:
|
|
264
|
+
autho = urllib.quote(autho)
|
|
265
|
+
requrl += "&signature=%s" % autho
|
|
266
|
+
print(requrl)
|
|
267
|
+
self.ws = websocket.WebSocketApp(requrl, None,
|
|
268
|
+
on_error=on_error, on_close=on_close, on_message=on_message)
|
|
269
|
+
self.ws.on_open = on_open
|
|
270
|
+
self.wst = threading.Thread(target=self.ws.run_forever)
|
|
271
|
+
self.wst.daemon = True
|
|
272
|
+
self.wst.start()
|
|
273
|
+
self.status = STARTED
|
|
274
|
+
response = {'voice_id': self.voice_id}
|
|
275
|
+
self.listener.on_recognition_start(response)
|
|
276
|
+
logger.info("%s recognition start" % response['voice_id'])
|
|
File without changes
|