smartpi 1.1.3__py3-none-any.whl → 1.1.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- smartpi/__init__.pyc +0 -0
- smartpi/_gui.pyc +0 -0
- smartpi/ai_asr.pyc +0 -0
- smartpi/ai_llm.pyc +0 -0
- smartpi/ai_tts.pyc +0 -0
- smartpi/ai_vad.pyc +0 -0
- smartpi/audio.pyc +0 -0
- smartpi/base_driver.pyc +0 -0
- smartpi/camera.pyc +0 -0
- smartpi/color_sensor.pyc +0 -0
- smartpi/cw2015.pyc +0 -0
- smartpi/flash.pyc +0 -0
- smartpi/humidity.pyc +0 -0
- smartpi/led.pyc +0 -0
- smartpi/light_sensor.pyc +0 -0
- smartpi/local_model.pyc +0 -0
- smartpi/mcp_client.pyc +0 -0
- smartpi/mcp_fastmcp.pyc +0 -0
- smartpi/mcp_intent_recognizer.pyc +0 -0
- smartpi/models/__init__.pyc +0 -0
- smartpi/models/snakers4_silero-vad/__init__.pyc +0 -0
- smartpi/models/snakers4_silero-vad/hubconf.pyc +0 -0
- smartpi/motor.pyc +0 -0
- smartpi/move.pyc +0 -0
- smartpi/onnx_hand_workflow.pyc +0 -0
- smartpi/onnx_image_workflow.pyc +0 -0
- smartpi/onnx_pose_workflow.pyc +0 -0
- smartpi/onnx_text_workflow.pyc +0 -0
- smartpi/onnx_voice_workflow.pyc +0 -0
- smartpi/posemodel/__init__.pyc +0 -0
- smartpi/posenet_utils.pyc +0 -0
- smartpi/rknn_hand_workflow.pyc +0 -0
- smartpi/rknn_image_workflow.pyc +0 -0
- smartpi/rknn_pose_workflow.pyc +0 -0
- smartpi/rknn_text_workflow.pyc +0 -0
- smartpi/rknn_voice_workflow.pyc +0 -0
- smartpi/servo.pyc +0 -0
- smartpi/temperature.pyc +0 -0
- smartpi/tencentcloud-speech-sdk-python/__init__.pyc +0 -0
- smartpi/tencentcloud-speech-sdk-python/asr/__init__.pyc +0 -0
- smartpi/tencentcloud-speech-sdk-python/asr/flash_recognizer.pyc +0 -0
- smartpi/tencentcloud-speech-sdk-python/asr/speech_recognizer.pyc +0 -0
- smartpi/tencentcloud-speech-sdk-python/common/__init__.pyc +0 -0
- smartpi/tencentcloud-speech-sdk-python/common/credential.pyc +0 -0
- smartpi/tencentcloud-speech-sdk-python/common/log.pyc +0 -0
- smartpi/tencentcloud-speech-sdk-python/common/utils.pyc +0 -0
- smartpi/tencentcloud-speech-sdk-python/soe/__init__.pyc +0 -0
- smartpi/tencentcloud-speech-sdk-python/soe/speaking_assessment.pyc +0 -0
- smartpi/tencentcloud-speech-sdk-python/tts/__init__.pyc +0 -0
- smartpi/tencentcloud-speech-sdk-python/tts/flowing_speech_synthesizer.pyc +0 -0
- smartpi/tencentcloud-speech-sdk-python/tts/speech_synthesizer.pyc +0 -0
- smartpi/tencentcloud-speech-sdk-python/tts/speech_synthesizer_ws.pyc +0 -0
- smartpi/tencentcloud-speech-sdk-python/vc/__init__.pyc +0 -0
- smartpi/tencentcloud-speech-sdk-python/vc/speech_convertor_ws.pyc +0 -0
- smartpi/text_gte_model/__init__.pyc +0 -0
- smartpi/text_gte_model/config/__init__.pyc +0 -0
- smartpi/text_gte_model/gte/__init__.pyc +0 -0
- smartpi/touch_sensor.pyc +0 -0
- smartpi/trace.pyc +0 -0
- smartpi/ultrasonic.pyc +0 -0
- {smartpi-1.1.3.dist-info → smartpi-1.1.4.dist-info}/METADATA +1 -1
- smartpi-1.1.4.dist-info/RECORD +77 -0
- smartpi/__init__.py +0 -8
- smartpi/_gui.py +0 -66
- smartpi/ai_asr.py +0 -1037
- smartpi/ai_llm.py +0 -934
- smartpi/ai_tts.py +0 -938
- smartpi/ai_vad.py +0 -83
- smartpi/audio.py +0 -125
- smartpi/base_driver.py +0 -618
- smartpi/camera.py +0 -84
- smartpi/color_sensor.py +0 -18
- smartpi/cw2015.py +0 -179
- smartpi/flash.py +0 -130
- smartpi/humidity.py +0 -20
- smartpi/led.py +0 -19
- smartpi/light_sensor.py +0 -72
- smartpi/local_model.py +0 -432
- smartpi/mcp_client.py +0 -100
- smartpi/mcp_fastmcp.py +0 -322
- smartpi/mcp_intent_recognizer.py +0 -408
- smartpi/models/__init__.py +0 -0
- smartpi/models/snakers4_silero-vad/__init__.py +0 -0
- smartpi/models/snakers4_silero-vad/hubconf.py +0 -56
- smartpi/motor.py +0 -177
- smartpi/move.py +0 -218
- smartpi/onnx_hand_workflow.py +0 -201
- smartpi/onnx_image_workflow.py +0 -176
- smartpi/onnx_pose_workflow.py +0 -482
- smartpi/onnx_text_workflow.py +0 -173
- smartpi/onnx_voice_workflow.py +0 -437
- smartpi/posemodel/__init__.py +0 -0
- smartpi/posenet_utils.py +0 -222
- smartpi/rknn_hand_workflow.py +0 -245
- smartpi/rknn_image_workflow.py +0 -405
- smartpi/rknn_pose_workflow.py +0 -592
- smartpi/rknn_text_workflow.py +0 -240
- smartpi/rknn_voice_workflow.py +0 -394
- smartpi/servo.py +0 -178
- smartpi/temperature.py +0 -18
- smartpi/tencentcloud-speech-sdk-python/__init__.py +0 -1
- smartpi/tencentcloud-speech-sdk-python/asr/__init__.py +0 -0
- smartpi/tencentcloud-speech-sdk-python/asr/flash_recognizer.py +0 -178
- smartpi/tencentcloud-speech-sdk-python/asr/speech_recognizer.py +0 -311
- smartpi/tencentcloud-speech-sdk-python/common/__init__.py +0 -1
- smartpi/tencentcloud-speech-sdk-python/common/credential.py +0 -6
- smartpi/tencentcloud-speech-sdk-python/common/log.py +0 -16
- smartpi/tencentcloud-speech-sdk-python/common/utils.py +0 -7
- smartpi/tencentcloud-speech-sdk-python/soe/__init__.py +0 -0
- smartpi/tencentcloud-speech-sdk-python/soe/speaking_assessment.py +0 -276
- smartpi/tencentcloud-speech-sdk-python/tts/__init__.py +0 -0
- smartpi/tencentcloud-speech-sdk-python/tts/flowing_speech_synthesizer.py +0 -294
- smartpi/tencentcloud-speech-sdk-python/tts/speech_synthesizer.py +0 -144
- smartpi/tencentcloud-speech-sdk-python/tts/speech_synthesizer_ws.py +0 -234
- smartpi/tencentcloud-speech-sdk-python/vc/__init__.py +0 -0
- smartpi/tencentcloud-speech-sdk-python/vc/speech_convertor_ws.py +0 -237
- smartpi/text_gte_model/__init__.py +0 -0
- smartpi/text_gte_model/config/__init__.py +0 -0
- smartpi/text_gte_model/gte/__init__.py +0 -0
- smartpi/touch_sensor.py +0 -16
- smartpi/trace.py +0 -120
- smartpi/ultrasonic.py +0 -20
- smartpi-1.1.3.dist-info/RECORD +0 -77
- {smartpi-1.1.3.dist-info → smartpi-1.1.4.dist-info}/WHEEL +0 -0
- {smartpi-1.1.3.dist-info → smartpi-1.1.4.dist-info}/top_level.txt +0 -0
|
@@ -1,16 +0,0 @@
|
|
|
1
|
-
import logging
|
|
2
|
-
import logging.handlers
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
FORMAT = '%(asctime)15s %(name)s-%(levelname)s %(funcName)s:%(lineno)s %(message)s'
|
|
7
|
-
logging.basicConfig(level=logging.DEBUG, format=FORMAT)
|
|
8
|
-
logger = logging.getLogger('tencent_speech.log')
|
|
9
|
-
|
|
10
|
-
handler = logging.handlers.RotatingFileHandler('tencent_speech.log', maxBytes=1024 * 1024,
|
|
11
|
-
backupCount=5, encoding='utf-8')
|
|
12
|
-
handler.setLevel(logging.DEBUG)
|
|
13
|
-
handler.setFormatter(logging.Formatter(FORMAT))
|
|
14
|
-
logger.addHandler(handler)
|
|
15
|
-
logger.setLevel('INFO')
|
|
16
|
-
|
|
File without changes
|
|
@@ -1,276 +0,0 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
import sys
|
|
3
|
-
import hmac
|
|
4
|
-
import hashlib
|
|
5
|
-
import base64
|
|
6
|
-
import time
|
|
7
|
-
import json
|
|
8
|
-
import threading
|
|
9
|
-
import urllib
|
|
10
|
-
|
|
11
|
-
import websocket
|
|
12
|
-
import uuid
|
|
13
|
-
from urllib.parse import quote
|
|
14
|
-
from common.log import logger
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
def is_python3():
|
|
18
|
-
if sys.version > '3':
|
|
19
|
-
return True
|
|
20
|
-
return False
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
# 实时识别语音使用
|
|
24
|
-
class SpeakingAssessmentListener():
|
|
25
|
-
'''
|
|
26
|
-
reponse:
|
|
27
|
-
on_recognition_start的返回只有voice_id字段。
|
|
28
|
-
on_fail 只有voice_id、code、message字段。
|
|
29
|
-
on_recognition_complete没有result字段。
|
|
30
|
-
其余消息包含所有字段。
|
|
31
|
-
字段名 类型
|
|
32
|
-
code Integer
|
|
33
|
-
message String
|
|
34
|
-
voice_id String
|
|
35
|
-
message_id String
|
|
36
|
-
result
|
|
37
|
-
final Integer
|
|
38
|
-
|
|
39
|
-
# Result的结构体格式为:
|
|
40
|
-
# slice_type Integer
|
|
41
|
-
# index Integer
|
|
42
|
-
# start_time Integer
|
|
43
|
-
# end_time Integer
|
|
44
|
-
# voice_text_str String
|
|
45
|
-
# word_size Integer
|
|
46
|
-
# word_list Word Array
|
|
47
|
-
#
|
|
48
|
-
# Word的类型为:
|
|
49
|
-
# word String
|
|
50
|
-
# start_time Integer
|
|
51
|
-
# end_time Integer
|
|
52
|
-
# stable_flag:Integer
|
|
53
|
-
'''
|
|
54
|
-
|
|
55
|
-
def on_recognition_start(self, response):
|
|
56
|
-
pass
|
|
57
|
-
|
|
58
|
-
def on_intermediate_result(self, response):
|
|
59
|
-
pass
|
|
60
|
-
|
|
61
|
-
def on_recognition_complete(self, response):
|
|
62
|
-
pass
|
|
63
|
-
|
|
64
|
-
def on_fail(self, response):
|
|
65
|
-
pass
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
NOTOPEN = 0
|
|
69
|
-
STARTED = 1
|
|
70
|
-
OPENED = 2
|
|
71
|
-
FINAL = 3
|
|
72
|
-
ERROR = 4
|
|
73
|
-
CLOSED = 5
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
def quote_autho(autho):
|
|
77
|
-
if sys.version_info >= (3, 0):
|
|
78
|
-
import urllib.parse as urlparse
|
|
79
|
-
return urlparse.quote(autho)
|
|
80
|
-
else:
|
|
81
|
-
return urllib.quote(autho)
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
# 实时识别使用
|
|
85
|
-
class SpeakingAssessment:
|
|
86
|
-
|
|
87
|
-
def __init__(self, appid, credential, engine_model_type, listener):
|
|
88
|
-
self.result = ""
|
|
89
|
-
self.credential = credential
|
|
90
|
-
self.appid = appid
|
|
91
|
-
self.server_engine_type = engine_model_type
|
|
92
|
-
self.status = NOTOPEN
|
|
93
|
-
self.ws = None
|
|
94
|
-
self.wst = None
|
|
95
|
-
self.voice_id = ""
|
|
96
|
-
self.new_start = 0
|
|
97
|
-
self.listener = listener
|
|
98
|
-
self.text_mode = 0
|
|
99
|
-
self.ref_text = ""
|
|
100
|
-
self.keyword = ""
|
|
101
|
-
self.eval_mode = 0
|
|
102
|
-
self.score_coeff = 1.0
|
|
103
|
-
self.sentence_info_enabled = 0
|
|
104
|
-
self.voice_format = 0
|
|
105
|
-
self.nonce = ""
|
|
106
|
-
self.rec_mode = 0
|
|
107
|
-
|
|
108
|
-
def set_text_mode(self, text_mode):
|
|
109
|
-
self.text_mode = text_mode
|
|
110
|
-
|
|
111
|
-
def set_rec_mode(self, rec_mode):
|
|
112
|
-
self.rec_mode = rec_mode
|
|
113
|
-
|
|
114
|
-
def set_ref_text(self, ref_text):
|
|
115
|
-
self.ref_text = ref_text
|
|
116
|
-
|
|
117
|
-
def set_keyword(self, keyword):
|
|
118
|
-
self.keyword = keyword
|
|
119
|
-
|
|
120
|
-
def set_eval_mode(self, eval_mode):
|
|
121
|
-
self.eval_mode = eval_mode
|
|
122
|
-
|
|
123
|
-
def set_sentence_info_enabled(self, sentence_info_enabled):
|
|
124
|
-
self.sentence_info_enabled = sentence_info_enabled
|
|
125
|
-
|
|
126
|
-
def set_voice_format(self, voice_format):
|
|
127
|
-
self.voice_format = voice_format
|
|
128
|
-
|
|
129
|
-
def set_nonce(self, nonce):
|
|
130
|
-
self.nonce = nonce
|
|
131
|
-
|
|
132
|
-
def format_sign_string(self, param):
|
|
133
|
-
signstr = "soe.cloud.tencent.com/soe/api/"
|
|
134
|
-
for t in param:
|
|
135
|
-
if 'appid' in t:
|
|
136
|
-
signstr += str(t[1])
|
|
137
|
-
break
|
|
138
|
-
signstr += "?"
|
|
139
|
-
for x in param:
|
|
140
|
-
tmp = x
|
|
141
|
-
if 'appid' in x:
|
|
142
|
-
continue
|
|
143
|
-
for t in tmp:
|
|
144
|
-
signstr += str(t)
|
|
145
|
-
signstr += "="
|
|
146
|
-
signstr = signstr[:-1]
|
|
147
|
-
signstr += "&"
|
|
148
|
-
signstr = signstr[:-1]
|
|
149
|
-
return signstr
|
|
150
|
-
|
|
151
|
-
def create_query_string(self, param):
|
|
152
|
-
signstr = ""
|
|
153
|
-
for key, value in param.items():
|
|
154
|
-
if key == 'appid':
|
|
155
|
-
signstr += str(value)
|
|
156
|
-
break
|
|
157
|
-
signstr += "?"
|
|
158
|
-
for key, value in param.items():
|
|
159
|
-
if key == 'appid':
|
|
160
|
-
continue
|
|
161
|
-
value = quote_autho(str(value))
|
|
162
|
-
signstr += str(key) + "=" + str(value) + "&"
|
|
163
|
-
signstr = signstr[:-1]
|
|
164
|
-
return "wss://soe.cloud.tencent.com/soe/api/" + signstr
|
|
165
|
-
|
|
166
|
-
def sign(self, signstr, secret_key):
|
|
167
|
-
hmacstr = hmac.new(secret_key.encode('utf-8'),
|
|
168
|
-
signstr.encode('utf-8'), hashlib.sha1).digest()
|
|
169
|
-
s = base64.b64encode(hmacstr)
|
|
170
|
-
s = s.decode('utf-8')
|
|
171
|
-
return s
|
|
172
|
-
|
|
173
|
-
def create_query_arr(self):
|
|
174
|
-
query_arr = dict()
|
|
175
|
-
|
|
176
|
-
query_arr['appid'] = self.appid
|
|
177
|
-
query_arr['server_engine_type'] = self.server_engine_type
|
|
178
|
-
query_arr['text_mode'] = self.text_mode
|
|
179
|
-
query_arr['rec_mode'] = self.rec_mode
|
|
180
|
-
query_arr['ref_text'] = self.ref_text
|
|
181
|
-
query_arr['keyword'] = self.keyword
|
|
182
|
-
query_arr['eval_mode'] = self.eval_mode
|
|
183
|
-
query_arr['score_coeff'] = self.score_coeff
|
|
184
|
-
query_arr['sentence_info_enabled'] = self.sentence_info_enabled
|
|
185
|
-
query_arr['secretid'] = self.credential.secret_id
|
|
186
|
-
if self.credential.token != "":
|
|
187
|
-
query_arr['token'] = self.credential.token
|
|
188
|
-
query_arr['voice_format'] = self.voice_format
|
|
189
|
-
query_arr['voice_id'] = self.voice_id
|
|
190
|
-
query_arr['timestamp'] = str(int(time.time()))
|
|
191
|
-
if self.nonce != "":
|
|
192
|
-
query_arr['nonce'] = self.nonce
|
|
193
|
-
else:
|
|
194
|
-
query_arr['nonce'] = query_arr['timestamp']
|
|
195
|
-
query_arr['expired'] = int(time.time()) + 24 * 60 * 60
|
|
196
|
-
return query_arr
|
|
197
|
-
|
|
198
|
-
def stop(self):
|
|
199
|
-
if self.status == OPENED:
|
|
200
|
-
msg = {'type': "end"}
|
|
201
|
-
text_str = json.dumps(msg)
|
|
202
|
-
self.ws.sock.send(text_str)
|
|
203
|
-
if self.ws:
|
|
204
|
-
if self.wst and self.wst.is_alive():
|
|
205
|
-
self.wst.join()
|
|
206
|
-
self.ws.close()
|
|
207
|
-
|
|
208
|
-
def write(self, data):
|
|
209
|
-
while self.status == STARTED:
|
|
210
|
-
time.sleep(0.1)
|
|
211
|
-
if self.status == OPENED:
|
|
212
|
-
self.ws.sock.send_binary(data)
|
|
213
|
-
|
|
214
|
-
def start(self):
|
|
215
|
-
def on_message(ws, message):
|
|
216
|
-
# print(message)
|
|
217
|
-
response = json.loads(message)
|
|
218
|
-
response['voice_id'] = self.voice_id
|
|
219
|
-
if response['code'] != 0:
|
|
220
|
-
logger.error("%s server recognition fail %s" %
|
|
221
|
-
(response['voice_id'], response['message']))
|
|
222
|
-
self.listener.on_fail(response)
|
|
223
|
-
return
|
|
224
|
-
if "final" in response and response["final"] == 1:
|
|
225
|
-
self.status = FINAL
|
|
226
|
-
self.result = message
|
|
227
|
-
self.listener.on_recognition_complete(response)
|
|
228
|
-
logger.info("%s recognition complete" % response['voice_id'])
|
|
229
|
-
self.ws.close()
|
|
230
|
-
return
|
|
231
|
-
else:
|
|
232
|
-
if response["result"] is not None:
|
|
233
|
-
self.listener.on_intermediate_result(response)
|
|
234
|
-
logger.info("%s recognition doing" % response['voice_id'])
|
|
235
|
-
return
|
|
236
|
-
|
|
237
|
-
def on_error(ws, error):
|
|
238
|
-
if self.status == FINAL:
|
|
239
|
-
return
|
|
240
|
-
logger.error("websocket error %s voice id %s" %
|
|
241
|
-
(format(error), self.voice_id))
|
|
242
|
-
self.status = ERROR
|
|
243
|
-
|
|
244
|
-
def on_close(ws):
|
|
245
|
-
self.status = CLOSED
|
|
246
|
-
logger.info("websocket closed voice id %s" %
|
|
247
|
-
self.voice_id)
|
|
248
|
-
|
|
249
|
-
def on_open(ws):
|
|
250
|
-
self.status = OPENED
|
|
251
|
-
|
|
252
|
-
query_arr = self.create_query_arr()
|
|
253
|
-
if self.voice_id == "":
|
|
254
|
-
query_arr['voice_id'] = str(uuid.uuid1())
|
|
255
|
-
self.voice_id = query_arr['voice_id']
|
|
256
|
-
query = sorted(query_arr.items(), key=lambda d: d[0])
|
|
257
|
-
signstr = self.format_sign_string(query)
|
|
258
|
-
autho = self.sign(signstr, self.credential.secret_key)
|
|
259
|
-
requrl = self.create_query_string(query_arr)
|
|
260
|
-
print(requrl)
|
|
261
|
-
if is_python3():
|
|
262
|
-
autho = urllib.parse.quote(autho)
|
|
263
|
-
else:
|
|
264
|
-
autho = urllib.quote(autho)
|
|
265
|
-
requrl += "&signature=%s" % autho
|
|
266
|
-
print(requrl)
|
|
267
|
-
self.ws = websocket.WebSocketApp(requrl, None,
|
|
268
|
-
on_error=on_error, on_close=on_close, on_message=on_message)
|
|
269
|
-
self.ws.on_open = on_open
|
|
270
|
-
self.wst = threading.Thread(target=self.ws.run_forever)
|
|
271
|
-
self.wst.daemon = True
|
|
272
|
-
self.wst.start()
|
|
273
|
-
self.status = STARTED
|
|
274
|
-
response = {'voice_id': self.voice_id}
|
|
275
|
-
self.listener.on_recognition_start(response)
|
|
276
|
-
logger.info("%s recognition start" % response['voice_id'])
|
|
File without changes
|
|
@@ -1,294 +0,0 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
import sys
|
|
3
|
-
import hmac
|
|
4
|
-
import hashlib
|
|
5
|
-
import base64
|
|
6
|
-
import time
|
|
7
|
-
import json
|
|
8
|
-
import threading
|
|
9
|
-
import websocket
|
|
10
|
-
import uuid
|
|
11
|
-
import urllib
|
|
12
|
-
from common.log import logger
|
|
13
|
-
from common.utils import is_python3
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
_PROTOCOL = "wss://"
|
|
17
|
-
_HOST = "tts.cloud.tencent.com"
|
|
18
|
-
_PATH = "/stream_wsv2"
|
|
19
|
-
_ACTION = "TextToStreamAudioWSv2"
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
class FlowingSpeechSynthesisListener(object):
|
|
23
|
-
'''
|
|
24
|
-
'''
|
|
25
|
-
def on_synthesis_start(self, session_id):
|
|
26
|
-
logger.info("on_synthesis_start: session_id={}".format(session_id))
|
|
27
|
-
|
|
28
|
-
def on_synthesis_end(self):
|
|
29
|
-
logger.info("on_synthesis_end: -")
|
|
30
|
-
|
|
31
|
-
def on_audio_result(self, audio_bytes):
|
|
32
|
-
logger.info("on_audio_result: recv audio bytes, len={}".format(len(audio_bytes)))
|
|
33
|
-
|
|
34
|
-
def on_text_result(self, response):
|
|
35
|
-
session_id = response["session_id"]
|
|
36
|
-
request_id = response["request_id"]
|
|
37
|
-
message_id = response["message_id"]
|
|
38
|
-
result = response['result']
|
|
39
|
-
subtitles = []
|
|
40
|
-
if "subtitles" in result and len(result["subtitles"]) > 0:
|
|
41
|
-
subtitles = result["subtitles"]
|
|
42
|
-
logger.info("on_text_result: session_id={} request_id={} message_id={}\nsubtitles={}".format(
|
|
43
|
-
session_id, request_id, message_id, subtitles))
|
|
44
|
-
|
|
45
|
-
def on_synthesis_fail(self, response):
|
|
46
|
-
logger.error("on_synthesis_fail: code={} msg={}".format(
|
|
47
|
-
response['code'], response['message']
|
|
48
|
-
))
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
NOTOPEN = 0
|
|
52
|
-
STARTED = 1
|
|
53
|
-
OPENED = 2
|
|
54
|
-
FINAL = 3
|
|
55
|
-
ERROR = 4
|
|
56
|
-
CLOSED = 5
|
|
57
|
-
|
|
58
|
-
FlowingSpeechSynthesizer_ACTION_SYNTHESIS = "ACTION_SYNTHESIS"
|
|
59
|
-
FlowingSpeechSynthesizer_ACTION_COMPLETE = "ACTION_COMPLETE"
|
|
60
|
-
FlowingSpeechSynthesizer_ACTION_RESET = "ACTION_RESET"
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
class FlowingSpeechSynthesizer:
|
|
64
|
-
|
|
65
|
-
def __init__(self, appid, credential, listener):
|
|
66
|
-
self.appid = appid
|
|
67
|
-
self.credential = credential
|
|
68
|
-
self.status = NOTOPEN
|
|
69
|
-
self.ws = None
|
|
70
|
-
self.wst = None
|
|
71
|
-
self.listener = listener
|
|
72
|
-
|
|
73
|
-
self.ready = False
|
|
74
|
-
|
|
75
|
-
self.voice_type = 0
|
|
76
|
-
self.codec = "pcm"
|
|
77
|
-
self.sample_rate = 16000
|
|
78
|
-
self.volume = 10
|
|
79
|
-
self.speed = 0
|
|
80
|
-
self.session_id = ""
|
|
81
|
-
self.enable_subtitle = 0
|
|
82
|
-
self.emotion_category = ""
|
|
83
|
-
self.emotion_intensity = 100
|
|
84
|
-
|
|
85
|
-
def set_voice_type(self, voice_type):
|
|
86
|
-
self.voice_type = voice_type
|
|
87
|
-
|
|
88
|
-
def set_emotion_category(self, emotion_category):
|
|
89
|
-
self.emotion_category = emotion_category
|
|
90
|
-
|
|
91
|
-
def set_emotion_intensity(self, emotion_intensity):
|
|
92
|
-
self.emotion_intensity = emotion_intensity
|
|
93
|
-
|
|
94
|
-
def set_codec(self, codec):
|
|
95
|
-
self.codec = codec
|
|
96
|
-
|
|
97
|
-
def set_sample_rate(self, sample_rate):
|
|
98
|
-
self.sample_rate = sample_rate
|
|
99
|
-
|
|
100
|
-
def set_speed(self, speed):
|
|
101
|
-
self.speed = speed
|
|
102
|
-
|
|
103
|
-
def set_volume(self, volume):
|
|
104
|
-
self.volume = volume
|
|
105
|
-
|
|
106
|
-
def set_enable_subtitle(self, enable_subtitle):
|
|
107
|
-
self.enable_subtitle = enable_subtitle
|
|
108
|
-
|
|
109
|
-
def __gen_signature(self, params):
|
|
110
|
-
sort_dict = sorted(params.keys())
|
|
111
|
-
sign_str = "GET" + _HOST + _PATH + "?"
|
|
112
|
-
for key in sort_dict:
|
|
113
|
-
sign_str = sign_str + key + "=" + str(params[key]) + '&'
|
|
114
|
-
sign_str = sign_str[:-1]
|
|
115
|
-
print(sign_str)
|
|
116
|
-
if is_python3():
|
|
117
|
-
secret_key = self.credential.secret_key.encode('utf-8')
|
|
118
|
-
sign_str = sign_str.encode('utf-8')
|
|
119
|
-
else:
|
|
120
|
-
secret_key = self.credential.secret_key
|
|
121
|
-
hmacstr = hmac.new(secret_key, sign_str, hashlib.sha1).digest()
|
|
122
|
-
s = base64.b64encode(hmacstr)
|
|
123
|
-
s = s.decode('utf-8')
|
|
124
|
-
return s
|
|
125
|
-
|
|
126
|
-
def __gen_params(self, session_id):
|
|
127
|
-
self.session_id = session_id
|
|
128
|
-
|
|
129
|
-
params = dict()
|
|
130
|
-
params['Action'] = _ACTION
|
|
131
|
-
params['AppId'] = int(self.appid)
|
|
132
|
-
params['SecretId'] = self.credential.secret_id
|
|
133
|
-
params['ModelType'] = 1
|
|
134
|
-
params['VoiceType'] = self.voice_type
|
|
135
|
-
params['Codec'] = self.codec
|
|
136
|
-
params['SampleRate'] = self.sample_rate
|
|
137
|
-
params['Speed'] = self.speed
|
|
138
|
-
params['Volume'] = self.volume
|
|
139
|
-
params['SessionId'] = self.session_id
|
|
140
|
-
params['EnableSubtitle'] = self.enable_subtitle
|
|
141
|
-
if self.emotion_category != "":
|
|
142
|
-
params['EmotionCategory']= self.emotion_category
|
|
143
|
-
params['EmotionIntensity']= self.emotion_intensity
|
|
144
|
-
|
|
145
|
-
timestamp = int(time.time())
|
|
146
|
-
params['Timestamp'] = timestamp
|
|
147
|
-
params['Expired'] = timestamp + 24 * 60 * 60
|
|
148
|
-
return params
|
|
149
|
-
|
|
150
|
-
def __create_query_string(self, param):
|
|
151
|
-
param = sorted(param.items(), key=lambda d: d[0])
|
|
152
|
-
|
|
153
|
-
url = _PROTOCOL + _HOST + _PATH
|
|
154
|
-
|
|
155
|
-
signstr = url + "?"
|
|
156
|
-
for x in param:
|
|
157
|
-
tmp = x
|
|
158
|
-
for t in tmp:
|
|
159
|
-
signstr += str(t)
|
|
160
|
-
signstr += "="
|
|
161
|
-
signstr = signstr[:-1]
|
|
162
|
-
signstr += "&"
|
|
163
|
-
signstr = signstr[:-1]
|
|
164
|
-
return signstr
|
|
165
|
-
|
|
166
|
-
def __new_ws_request_message(self, action, data):
|
|
167
|
-
return {
|
|
168
|
-
"session_id": self.session_id,
|
|
169
|
-
"message_id": str(uuid.uuid1()),
|
|
170
|
-
|
|
171
|
-
"action": action,
|
|
172
|
-
"data": data,
|
|
173
|
-
}
|
|
174
|
-
|
|
175
|
-
def __do_send(self, action, text):
|
|
176
|
-
WSRequestMessage = self.__new_ws_request_message(action, text)
|
|
177
|
-
data = json.dumps(WSRequestMessage)
|
|
178
|
-
opcode = websocket.ABNF.OPCODE_TEXT
|
|
179
|
-
logger.info("ws send opcode={} data={}".format(opcode, data))
|
|
180
|
-
self.ws.send(data, opcode)
|
|
181
|
-
|
|
182
|
-
def process(self, text, action=FlowingSpeechSynthesizer_ACTION_SYNTHESIS):
|
|
183
|
-
logger.info("process: action={} data={}".format(action, text))
|
|
184
|
-
self.__do_send(action, text)
|
|
185
|
-
|
|
186
|
-
def complete(self, action = FlowingSpeechSynthesizer_ACTION_COMPLETE):
|
|
187
|
-
logger.info("complete: action={}".format(action))
|
|
188
|
-
self.__do_send(action, "")
|
|
189
|
-
|
|
190
|
-
def reset(self, action = FlowingSpeechSynthesizer_ACTION_RESET):
|
|
191
|
-
logger.info("reset: action={}".format(action))
|
|
192
|
-
self.__do_send(action, "")
|
|
193
|
-
|
|
194
|
-
def wait_ready(self, timeout_ms):
|
|
195
|
-
timeout_start = int(time.time() * 1000)
|
|
196
|
-
while True:
|
|
197
|
-
if self.ready:
|
|
198
|
-
return True
|
|
199
|
-
if int(time.time() * 1000) - timeout_start > timeout_ms:
|
|
200
|
-
break
|
|
201
|
-
time.sleep(0.01)
|
|
202
|
-
return False
|
|
203
|
-
|
|
204
|
-
def start(self):
|
|
205
|
-
logger.info("synthesizer start: begin")
|
|
206
|
-
|
|
207
|
-
def _close_conn(reason):
|
|
208
|
-
ta = time.time()
|
|
209
|
-
self.ws.close()
|
|
210
|
-
tb = time.time()
|
|
211
|
-
logger.info("client has closed connection ({}), cost {} ms".format(reason, int((tb-ta)*1000)))
|
|
212
|
-
|
|
213
|
-
def _on_data(ws, data, opcode, flag):
|
|
214
|
-
logger.debug("data={} opcode={} flag={}".format(data, opcode, flag))
|
|
215
|
-
if opcode == websocket.ABNF.OPCODE_BINARY:
|
|
216
|
-
self.listener.on_audio_result(data) # <class 'bytes'>
|
|
217
|
-
pass
|
|
218
|
-
elif opcode == websocket.ABNF.OPCODE_TEXT:
|
|
219
|
-
resp = json.loads(data) # WSResponseMessage
|
|
220
|
-
if resp['code'] != 0:
|
|
221
|
-
logger.error("server synthesis fail request_id={} code={} msg={}".format(
|
|
222
|
-
resp['request_id'], resp['code'], resp['message']
|
|
223
|
-
))
|
|
224
|
-
self.listener.on_synthesis_fail(resp)
|
|
225
|
-
return
|
|
226
|
-
if "final" in resp and resp['final'] == 1:
|
|
227
|
-
logger.info("recv FINAL frame")
|
|
228
|
-
self.status = FINAL
|
|
229
|
-
_close_conn("after recv final")
|
|
230
|
-
self.listener.on_synthesis_end()
|
|
231
|
-
return
|
|
232
|
-
if "ready" in resp and resp['ready'] == 1:
|
|
233
|
-
logger.info("recv READY frame")
|
|
234
|
-
self.ready = True
|
|
235
|
-
return
|
|
236
|
-
if "reset" in resp and resp['reset'] == 1:
|
|
237
|
-
logger.info("recv RESET frame")
|
|
238
|
-
return
|
|
239
|
-
if "heartbeat" in resp and resp['heartbeat'] == 1:
|
|
240
|
-
logger.info("recv HEARTBEAT frame")
|
|
241
|
-
return
|
|
242
|
-
if "result" in resp:
|
|
243
|
-
if "subtitles" in resp["result"] and resp["result"]["subtitles"] is not None:
|
|
244
|
-
self.listener.on_text_result(resp)
|
|
245
|
-
return
|
|
246
|
-
else:
|
|
247
|
-
logger.error("invalid on_data code, opcode=".format(opcode))
|
|
248
|
-
|
|
249
|
-
def _on_error(ws, error):
|
|
250
|
-
if self.status == FINAL or self.status == CLOSED:
|
|
251
|
-
return
|
|
252
|
-
self.status = ERROR
|
|
253
|
-
logger.error("error={}, session_id={}".format(error, self.session_id))
|
|
254
|
-
_close_conn("after recv error")
|
|
255
|
-
|
|
256
|
-
def _on_close(ws, close_status_code, close_msg):
|
|
257
|
-
logger.info("conn closed, close_status_code={} close_msg={}".format(close_status_code, close_msg))
|
|
258
|
-
self.status = CLOSED
|
|
259
|
-
|
|
260
|
-
def _on_open(ws):
|
|
261
|
-
logger.info("conn opened")
|
|
262
|
-
self.status = OPENED
|
|
263
|
-
|
|
264
|
-
session_id = str(uuid.uuid1())
|
|
265
|
-
params = self.__gen_params(session_id)
|
|
266
|
-
signature = self.__gen_signature(params)
|
|
267
|
-
requrl = self.__create_query_string(params)
|
|
268
|
-
|
|
269
|
-
if is_python3():
|
|
270
|
-
autho = urllib.parse.quote(signature)
|
|
271
|
-
else:
|
|
272
|
-
autho = urllib.quote(signature)
|
|
273
|
-
requrl += "&Signature=%s" % autho
|
|
274
|
-
print(requrl)
|
|
275
|
-
|
|
276
|
-
self.ws = websocket.WebSocketApp(requrl, None,# header=headers,
|
|
277
|
-
on_error=_on_error, on_close=_on_close,
|
|
278
|
-
on_data=_on_data)
|
|
279
|
-
self.ws.on_open = _on_open
|
|
280
|
-
|
|
281
|
-
self.status = STARTED
|
|
282
|
-
self.wst = threading.Thread(target=self.ws.run_forever)
|
|
283
|
-
self.wst.daemon = True
|
|
284
|
-
self.wst.start()
|
|
285
|
-
self.listener.on_synthesis_start(session_id)
|
|
286
|
-
|
|
287
|
-
logger.info("synthesizer start: end")
|
|
288
|
-
|
|
289
|
-
def wait(self):
|
|
290
|
-
logger.info("synthesizer wait: begin")
|
|
291
|
-
if self.ws:
|
|
292
|
-
if self.wst and self.wst.is_alive():
|
|
293
|
-
self.wst.join()
|
|
294
|
-
logger.info("synthesizer wait: end")
|