smartpi 0.1.40__py3-none-any.whl → 0.1.42__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. smartpi/__init__.py +1 -1
  2. smartpi/ai_asr.py +1037 -0
  3. smartpi/ai_llm.py +934 -0
  4. smartpi/ai_tts.py +938 -0
  5. smartpi/ai_vad.py +83 -0
  6. smartpi/base_driver.py +265 -11
  7. smartpi/local_model.py +432 -0
  8. smartpi/mcp_client.py +100 -0
  9. smartpi/mcp_fastmcp.py +322 -0
  10. smartpi/mcp_intent_recognizer.py +408 -0
  11. smartpi/models/__init__.py +0 -0
  12. smartpi/models/snakers4_silero-vad/__init__.py +0 -0
  13. smartpi/models/snakers4_silero-vad/hubconf.py +56 -0
  14. smartpi/models/snakers4_silero-vad/src/silero_vad/data/silero_vad.jit +0 -0
  15. smartpi/models/snakers4_silero-vad/src/silero_vad/data/silero_vad.onnx +0 -0
  16. smartpi/models/snakers4_silero-vad/src/silero_vad/data/silero_vad_16k_op15.onnx +0 -0
  17. smartpi/models/snakers4_silero-vad/src/silero_vad/data/silero_vad_half.onnx +0 -0
  18. smartpi/onnx_pose_workflow.py +1 -1
  19. smartpi/rknn_pose_workflow.py +1 -1
  20. smartpi/tencentcloud-speech-sdk-python/__init__.py +1 -0
  21. smartpi/tencentcloud-speech-sdk-python/asr/__init__.py +0 -0
  22. smartpi/tencentcloud-speech-sdk-python/asr/flash_recognizer.py +178 -0
  23. smartpi/tencentcloud-speech-sdk-python/asr/speech_recognizer.py +311 -0
  24. smartpi/tencentcloud-speech-sdk-python/common/__init__.py +1 -0
  25. smartpi/tencentcloud-speech-sdk-python/common/credential.py +6 -0
  26. smartpi/tencentcloud-speech-sdk-python/common/log.py +16 -0
  27. smartpi/tencentcloud-speech-sdk-python/common/utils.py +7 -0
  28. smartpi/tencentcloud-speech-sdk-python/examples/tts/tts_text.txt +60 -0
  29. smartpi/tencentcloud-speech-sdk-python/soe/__init__.py +0 -0
  30. smartpi/tencentcloud-speech-sdk-python/soe/speaking_assessment.py +276 -0
  31. smartpi/tencentcloud-speech-sdk-python/tts/__init__.py +0 -0
  32. smartpi/tencentcloud-speech-sdk-python/tts/flowing_speech_synthesizer.py +294 -0
  33. smartpi/tencentcloud-speech-sdk-python/tts/speech_synthesizer.py +144 -0
  34. smartpi/tencentcloud-speech-sdk-python/tts/speech_synthesizer_ws.py +234 -0
  35. smartpi/tencentcloud-speech-sdk-python/vc/__init__.py +0 -0
  36. smartpi/tencentcloud-speech-sdk-python/vc/speech_convertor_ws.py +237 -0
  37. {smartpi-0.1.40.dist-info → smartpi-0.1.42.dist-info}/METADATA +1 -1
  38. smartpi-0.1.42.dist-info/RECORD +76 -0
  39. smartpi-0.1.40.dist-info/RECORD +0 -44
  40. {smartpi-0.1.40.dist-info → smartpi-0.1.42.dist-info}/WHEEL +0 -0
  41. {smartpi-0.1.40.dist-info → smartpi-0.1.42.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,294 @@
1
+ # -*- coding: utf-8 -*-
2
+ import sys
3
+ import hmac
4
+ import hashlib
5
+ import base64
6
+ import time
7
+ import json
8
+ import threading
9
+ import websocket
10
+ import uuid
11
+ import urllib
12
+ from common.log import logger
13
+ from common.utils import is_python3
14
+
15
+
16
+ _PROTOCOL = "wss://"
17
+ _HOST = "tts.cloud.tencent.com"
18
+ _PATH = "/stream_wsv2"
19
+ _ACTION = "TextToStreamAudioWSv2"
20
+
21
+
22
+ class FlowingSpeechSynthesisListener(object):
23
+ '''
24
+ '''
25
+ def on_synthesis_start(self, session_id):
26
+ logger.info("on_synthesis_start: session_id={}".format(session_id))
27
+
28
+ def on_synthesis_end(self):
29
+ logger.info("on_synthesis_end: -")
30
+
31
+ def on_audio_result(self, audio_bytes):
32
+ logger.info("on_audio_result: recv audio bytes, len={}".format(len(audio_bytes)))
33
+
34
+ def on_text_result(self, response):
35
+ session_id = response["session_id"]
36
+ request_id = response["request_id"]
37
+ message_id = response["message_id"]
38
+ result = response['result']
39
+ subtitles = []
40
+ if "subtitles" in result and len(result["subtitles"]) > 0:
41
+ subtitles = result["subtitles"]
42
+ logger.info("on_text_result: session_id={} request_id={} message_id={}\nsubtitles={}".format(
43
+ session_id, request_id, message_id, subtitles))
44
+
45
+ def on_synthesis_fail(self, response):
46
+ logger.error("on_synthesis_fail: code={} msg={}".format(
47
+ response['code'], response['message']
48
+ ))
49
+
50
+
51
+ NOTOPEN = 0
52
+ STARTED = 1
53
+ OPENED = 2
54
+ FINAL = 3
55
+ ERROR = 4
56
+ CLOSED = 5
57
+
58
+ FlowingSpeechSynthesizer_ACTION_SYNTHESIS = "ACTION_SYNTHESIS"
59
+ FlowingSpeechSynthesizer_ACTION_COMPLETE = "ACTION_COMPLETE"
60
+ FlowingSpeechSynthesizer_ACTION_RESET = "ACTION_RESET"
61
+
62
+
63
+ class FlowingSpeechSynthesizer:
64
+
65
+ def __init__(self, appid, credential, listener):
66
+ self.appid = appid
67
+ self.credential = credential
68
+ self.status = NOTOPEN
69
+ self.ws = None
70
+ self.wst = None
71
+ self.listener = listener
72
+
73
+ self.ready = False
74
+
75
+ self.voice_type = 0
76
+ self.codec = "pcm"
77
+ self.sample_rate = 16000
78
+ self.volume = 10
79
+ self.speed = 0
80
+ self.session_id = ""
81
+ self.enable_subtitle = 0
82
+ self.emotion_category = ""
83
+ self.emotion_intensity = 100
84
+
85
+ def set_voice_type(self, voice_type):
86
+ self.voice_type = voice_type
87
+
88
+ def set_emotion_category(self, emotion_category):
89
+ self.emotion_category = emotion_category
90
+
91
+ def set_emotion_intensity(self, emotion_intensity):
92
+ self.emotion_intensity = emotion_intensity
93
+
94
+ def set_codec(self, codec):
95
+ self.codec = codec
96
+
97
+ def set_sample_rate(self, sample_rate):
98
+ self.sample_rate = sample_rate
99
+
100
+ def set_speed(self, speed):
101
+ self.speed = speed
102
+
103
+ def set_volume(self, volume):
104
+ self.volume = volume
105
+
106
+ def set_enable_subtitle(self, enable_subtitle):
107
+ self.enable_subtitle = enable_subtitle
108
+
109
+ def __gen_signature(self, params):
110
+ sort_dict = sorted(params.keys())
111
+ sign_str = "GET" + _HOST + _PATH + "?"
112
+ for key in sort_dict:
113
+ sign_str = sign_str + key + "=" + str(params[key]) + '&'
114
+ sign_str = sign_str[:-1]
115
+ print(sign_str)
116
+ if is_python3():
117
+ secret_key = self.credential.secret_key.encode('utf-8')
118
+ sign_str = sign_str.encode('utf-8')
119
+ else:
120
+ secret_key = self.credential.secret_key
121
+ hmacstr = hmac.new(secret_key, sign_str, hashlib.sha1).digest()
122
+ s = base64.b64encode(hmacstr)
123
+ s = s.decode('utf-8')
124
+ return s
125
+
126
+ def __gen_params(self, session_id):
127
+ self.session_id = session_id
128
+
129
+ params = dict()
130
+ params['Action'] = _ACTION
131
+ params['AppId'] = int(self.appid)
132
+ params['SecretId'] = self.credential.secret_id
133
+ params['ModelType'] = 1
134
+ params['VoiceType'] = self.voice_type
135
+ params['Codec'] = self.codec
136
+ params['SampleRate'] = self.sample_rate
137
+ params['Speed'] = self.speed
138
+ params['Volume'] = self.volume
139
+ params['SessionId'] = self.session_id
140
+ params['EnableSubtitle'] = self.enable_subtitle
141
+ if self.emotion_category != "":
142
+ params['EmotionCategory']= self.emotion_category
143
+ params['EmotionIntensity']= self.emotion_intensity
144
+
145
+ timestamp = int(time.time())
146
+ params['Timestamp'] = timestamp
147
+ params['Expired'] = timestamp + 24 * 60 * 60
148
+ return params
149
+
150
+ def __create_query_string(self, param):
151
+ param = sorted(param.items(), key=lambda d: d[0])
152
+
153
+ url = _PROTOCOL + _HOST + _PATH
154
+
155
+ signstr = url + "?"
156
+ for x in param:
157
+ tmp = x
158
+ for t in tmp:
159
+ signstr += str(t)
160
+ signstr += "="
161
+ signstr = signstr[:-1]
162
+ signstr += "&"
163
+ signstr = signstr[:-1]
164
+ return signstr
165
+
166
+ def __new_ws_request_message(self, action, data):
167
+ return {
168
+ "session_id": self.session_id,
169
+ "message_id": str(uuid.uuid1()),
170
+
171
+ "action": action,
172
+ "data": data,
173
+ }
174
+
175
+ def __do_send(self, action, text):
176
+ WSRequestMessage = self.__new_ws_request_message(action, text)
177
+ data = json.dumps(WSRequestMessage)
178
+ opcode = websocket.ABNF.OPCODE_TEXT
179
+ logger.info("ws send opcode={} data={}".format(opcode, data))
180
+ self.ws.send(data, opcode)
181
+
182
+ def process(self, text, action=FlowingSpeechSynthesizer_ACTION_SYNTHESIS):
183
+ logger.info("process: action={} data={}".format(action, text))
184
+ self.__do_send(action, text)
185
+
186
+ def complete(self, action = FlowingSpeechSynthesizer_ACTION_COMPLETE):
187
+ logger.info("complete: action={}".format(action))
188
+ self.__do_send(action, "")
189
+
190
+ def reset(self, action = FlowingSpeechSynthesizer_ACTION_RESET):
191
+ logger.info("reset: action={}".format(action))
192
+ self.__do_send(action, "")
193
+
194
+ def wait_ready(self, timeout_ms):
195
+ timeout_start = int(time.time() * 1000)
196
+ while True:
197
+ if self.ready:
198
+ return True
199
+ if int(time.time() * 1000) - timeout_start > timeout_ms:
200
+ break
201
+ time.sleep(0.01)
202
+ return False
203
+
204
+ def start(self):
205
+ logger.info("synthesizer start: begin")
206
+
207
+ def _close_conn(reason):
208
+ ta = time.time()
209
+ self.ws.close()
210
+ tb = time.time()
211
+ logger.info("client has closed connection ({}), cost {} ms".format(reason, int((tb-ta)*1000)))
212
+
213
+ def _on_data(ws, data, opcode, flag):
214
+ logger.debug("data={} opcode={} flag={}".format(data, opcode, flag))
215
+ if opcode == websocket.ABNF.OPCODE_BINARY:
216
+ self.listener.on_audio_result(data) # <class 'bytes'>
217
+ pass
218
+ elif opcode == websocket.ABNF.OPCODE_TEXT:
219
+ resp = json.loads(data) # WSResponseMessage
220
+ if resp['code'] != 0:
221
+ logger.error("server synthesis fail request_id={} code={} msg={}".format(
222
+ resp['request_id'], resp['code'], resp['message']
223
+ ))
224
+ self.listener.on_synthesis_fail(resp)
225
+ return
226
+ if "final" in resp and resp['final'] == 1:
227
+ logger.info("recv FINAL frame")
228
+ self.status = FINAL
229
+ _close_conn("after recv final")
230
+ self.listener.on_synthesis_end()
231
+ return
232
+ if "ready" in resp and resp['ready'] == 1:
233
+ logger.info("recv READY frame")
234
+ self.ready = True
235
+ return
236
+ if "reset" in resp and resp['reset'] == 1:
237
+ logger.info("recv RESET frame")
238
+ return
239
+ if "heartbeat" in resp and resp['heartbeat'] == 1:
240
+ logger.info("recv HEARTBEAT frame")
241
+ return
242
+ if "result" in resp:
243
+ if "subtitles" in resp["result"] and resp["result"]["subtitles"] is not None:
244
+ self.listener.on_text_result(resp)
245
+ return
246
+ else:
247
+ logger.error("invalid on_data code, opcode=".format(opcode))
248
+
249
+ def _on_error(ws, error):
250
+ if self.status == FINAL or self.status == CLOSED:
251
+ return
252
+ self.status = ERROR
253
+ logger.error("error={}, session_id={}".format(error, self.session_id))
254
+ _close_conn("after recv error")
255
+
256
+ def _on_close(ws, close_status_code, close_msg):
257
+ logger.info("conn closed, close_status_code={} close_msg={}".format(close_status_code, close_msg))
258
+ self.status = CLOSED
259
+
260
+ def _on_open(ws):
261
+ logger.info("conn opened")
262
+ self.status = OPENED
263
+
264
+ session_id = str(uuid.uuid1())
265
+ params = self.__gen_params(session_id)
266
+ signature = self.__gen_signature(params)
267
+ requrl = self.__create_query_string(params)
268
+
269
+ if is_python3():
270
+ autho = urllib.parse.quote(signature)
271
+ else:
272
+ autho = urllib.quote(signature)
273
+ requrl += "&Signature=%s" % autho
274
+ print(requrl)
275
+
276
+ self.ws = websocket.WebSocketApp(requrl, None,# header=headers,
277
+ on_error=_on_error, on_close=_on_close,
278
+ on_data=_on_data)
279
+ self.ws.on_open = _on_open
280
+
281
+ self.status = STARTED
282
+ self.wst = threading.Thread(target=self.ws.run_forever)
283
+ self.wst.daemon = True
284
+ self.wst.start()
285
+ self.listener.on_synthesis_start(session_id)
286
+
287
+ logger.info("synthesizer start: end")
288
+
289
+ def wait(self):
290
+ logger.info("synthesizer wait: begin")
291
+ if self.ws:
292
+ if self.wst and self.wst.is_alive():
293
+ self.wst.join()
294
+ logger.info("synthesizer wait: end")
@@ -0,0 +1,144 @@
1
+ # -*- coding: utf-8 -*-
2
+ import sys
3
+ import hmac
4
+ import hashlib
5
+ import base64
6
+ import time
7
+ import json
8
+ import uuid
9
+ import requests
10
+
11
+
12
+ def is_python3():
13
+ if sys.version > '3':
14
+ return True
15
+ return False
16
+
17
+
18
+ _PROTOCOL = "https://"
19
+ _HOST = "tts.cloud.tencent.com"
20
+ _PATH = "/stream"
21
+ _ACTION = "TextToStreamAudio"
22
+
23
+
24
+ class SpeechSynthesisListener:
25
+ '''
26
+ reponse:
27
+ 所有回调均包含session_id字段
28
+ on_message与on_message包含data字段
29
+ on_fail包含Code、Message字段。
30
+
31
+ 字段名 类型 说明
32
+ session_id String 本次请求id
33
+ data String 语音数据
34
+ Code String 错误码
35
+ Message String 错误信息
36
+ '''
37
+
38
+ def on_message(self, response):
39
+ pass
40
+
41
+ def on_complete(self, response):
42
+ pass
43
+
44
+ def on_fail(self, response):
45
+ pass
46
+
47
+
48
+ class SpeechSynthesizer:
49
+
50
+ def __init__(self, appid, credential, voice_type, listener):
51
+ self.appid = appid
52
+ self.credential = credential
53
+ self.voice_type = voice_type
54
+ self.codec = "pcm"
55
+ self.sample_rate = 16000
56
+ self.volume = 0
57
+ self.speed = 0
58
+ self.listener = listener
59
+
60
+ def set_voice_type(self, voice_type):
61
+ self.voice_type = voice_type
62
+
63
+ def set_codec(self, codec):
64
+ self.codec = codec
65
+
66
+ def set_sample_rate(self, sample_rate):
67
+ self.sample_rate = sample_rate
68
+
69
+ def set_speed(self, speed):
70
+ self.speed = speed
71
+
72
+ def set_volume(self, volume):
73
+ self.volume = volume
74
+
75
+ def synthesis(self, text):
76
+ session_id = str(uuid.uuid1())
77
+ params = self.__gen_params(session_id, text)
78
+ signature = self.__gen_signature(params)
79
+ headers = {
80
+ "Content-Type": "application/json",
81
+ "Authorization": str(signature)
82
+ }
83
+ url = _PROTOCOL + _HOST + _PATH
84
+ r = requests.post(url, headers=headers,
85
+ data=json.dumps(params), stream=True)
86
+ data = None
87
+ response = dict()
88
+ response["session_id"] = session_id
89
+ for chunk in r.iter_content(None):
90
+ if data is None:
91
+ try:
92
+ rsp = json.loads(chunk)
93
+ response["Code"] = rsp["Response"]["Error"]["Code"]
94
+ response["Message"] = rsp["Response"]["Error"]["Message"]
95
+ self.listener.on_fail(response)
96
+ return
97
+ except:
98
+ data = chunk
99
+ response["data"] = data
100
+ self.listener.on_message(response)
101
+ continue
102
+ data = data + chunk
103
+ response["data"] = data
104
+ self.listener.on_message(response)
105
+ response["data"] = data
106
+ self.listener.on_complete(response)
107
+
108
+ def __gen_signature(self, params):
109
+ sort_dict = sorted(params.keys())
110
+ sign_str = "POST" + _HOST + _PATH + "?"
111
+ for key in sort_dict:
112
+ sign_str = sign_str + key + "=" + str(params[key]) + '&'
113
+ sign_str = sign_str[:-1]
114
+ hmacstr = hmac.new(self.credential.secret_key.encode('utf-8'),
115
+ sign_str.encode('utf-8'), hashlib.sha1).digest()
116
+ s = base64.b64encode(hmacstr)
117
+ s = s.decode('utf-8')
118
+ return s
119
+
120
+ def __sign(self, signstr, secret_key):
121
+ hmacstr = hmac.new(secret_key.encode('utf-8'),
122
+ signstr.encode('utf-8'), hashlib.sha1).digest()
123
+ s = base64.b64encode(hmacstr)
124
+ s = s.decode('utf-8')
125
+ return s
126
+
127
+ def __gen_params(self, session_id, text):
128
+ params = dict()
129
+ params['Action'] = _ACTION
130
+ params['AppId'] = int(self.appid)
131
+ params['SecretId'] = self.credential.secret_id
132
+ params['ModelType'] = 1
133
+ params['VoiceType'] = self.voice_type
134
+ params['Codec'] = self.codec
135
+ params['SampleRate'] = self.sample_rate
136
+ params['Speed'] = self.speed
137
+ params['Volume'] = self.volume
138
+ params['SessionId'] = session_id
139
+ params['Text'] = text
140
+
141
+ timestamp = int(time.time())
142
+ params['Timestamp'] = timestamp
143
+ params['Expired'] = timestamp + 24 * 60 * 60
144
+ return params
@@ -0,0 +1,234 @@
1
+ # -*- coding: utf-8 -*-
2
+ import sys
3
+ import hmac
4
+ import hashlib
5
+ import base64
6
+ import time
7
+ import json
8
+ import threading
9
+ from websocket import ABNF, WebSocketApp
10
+ import uuid
11
+ import urllib
12
+ from common.log import logger
13
+
14
+
15
+ _PROTOCOL = "wss://"
16
+ _HOST = "tts.cloud.tencent.com"
17
+ _PATH = "/stream_ws"
18
+ _ACTION = "TextToStreamAudioWS"
19
+
20
+
21
+ class SpeechSynthesisListener(object):
22
+ '''
23
+ '''
24
+ def on_synthesis_start(self, session_id):
25
+ logger.info("on_synthesis_start: session_id={}".format(session_id))
26
+
27
+ def on_synthesis_end(self):
28
+ logger.info("on_synthesis_end: -")
29
+
30
+ def on_audio_result(self, audio_bytes):
31
+ logger.info("on_audio_result: recv audio bytes, len={}".format(len(audio_bytes)))
32
+
33
+ def on_text_result(self, response):
34
+ session_id = response["session_id"]
35
+ request_id = response["request_id"]
36
+ message_id = response["message_id"]
37
+ result = response['result']
38
+ subtitles = []
39
+ if "subtitles" in result and len(result["subtitles"]) > 0:
40
+ subtitles = result["subtitles"]
41
+ logger.info("on_text_result: session_id={} request_id={} message_id={}\nsubtitles={}".format(
42
+ session_id, request_id, message_id, subtitles))
43
+
44
+ def on_synthesis_fail(self, response):
45
+ logger.error("on_synthesis_fail: code={} msg={}".format(
46
+ response['code'], response['message']
47
+ ))
48
+
49
+
50
+ NOTOPEN = 0
51
+ STARTED = 1
52
+ OPENED = 2
53
+ FINAL = 3
54
+ ERROR = 4
55
+ CLOSED = 5
56
+
57
+
58
+ class SpeechSynthesizer:
59
+
60
+ def __init__(self, appid, credential, listener):
61
+ self.appid = appid
62
+ self.credential = credential
63
+ self.status = NOTOPEN
64
+ self.ws = None
65
+ self.wst = None
66
+ self.listener = listener
67
+
68
+ self.text = "欢迎使用腾讯云实时语音合成"
69
+ self.voice_type = 0
70
+ self.codec = "pcm"
71
+ self.sample_rate = 16000
72
+ self.volume = 0
73
+ self.speed = 0
74
+ self.session_id = ""
75
+ self.enable_subtitle = True
76
+ self.fast_voice_type = ""
77
+
78
+ def set_voice_type(self, voice_type):
79
+ self.voice_type = voice_type
80
+
81
+ def set_codec(self, codec):
82
+ self.codec = codec
83
+
84
+ def set_sample_rate(self, sample_rate):
85
+ self.sample_rate = sample_rate
86
+
87
+ def set_speed(self, speed):
88
+ self.speed = speed
89
+
90
+ def set_volume(self, volume):
91
+ self.volume = volume
92
+
93
+ def set_text(self, text):
94
+ self.text = text
95
+
96
+ def set_enable_subtitle(self, enable_subtitle):
97
+ self.enable_subtitle = enable_subtitle
98
+
99
+ def set_fast_voice_type(self, fast_voice_type):
100
+ self.fast_voice_type = fast_voice_type
101
+
102
+ def __gen_signature(self, params):
103
+ sort_dict = sorted(params.keys())
104
+ sign_str = "GET" + _HOST + _PATH + "?"
105
+ for key in sort_dict:
106
+ sign_str = sign_str + key + "=" + str(params[key]) + '&'
107
+ sign_str = sign_str[:-1]
108
+ secret_key = self.credential.secret_key.encode('utf-8')
109
+ sign_str = sign_str.encode('utf-8')
110
+ hmacstr = hmac.new(secret_key, sign_str, hashlib.sha1).digest()
111
+ s = base64.b64encode(hmacstr)
112
+ s = s.decode('utf-8')
113
+ return s
114
+
115
+ def __gen_params(self, session_id):
116
+ self.session_id = session_id
117
+
118
+ params = dict()
119
+ params['Action'] = _ACTION
120
+ params['AppId'] = int(self.appid)
121
+ params['SecretId'] = self.credential.secret_id
122
+ params['ModelType'] = 1
123
+ params['VoiceType'] = self.voice_type
124
+ params['Codec'] = self.codec
125
+ params['SampleRate'] = self.sample_rate
126
+ params['Speed'] = self.speed
127
+ params['Volume'] = self.volume
128
+ params['SessionId'] = self.session_id
129
+ params['Text'] = self.text
130
+ params['EnableSubtitle'] = self.enable_subtitle
131
+ if len(self.fast_voice_type) > 0:
132
+ params['FastVoiceType'] = self.fast_voice_type
133
+
134
+ timestamp = int(time.time())
135
+ params['Timestamp'] = timestamp
136
+ params['Expired'] = timestamp + 24 * 60 * 60
137
+ return params
138
+
139
+ def __create_query_string(self, param):
140
+ param['Text'] = urllib.parse.quote(param['Text'])
141
+
142
+ param = sorted(param.items(), key=lambda d: d[0])
143
+
144
+ url = _PROTOCOL + _HOST + _PATH
145
+
146
+ signstr = url + "?"
147
+ for x in param:
148
+ tmp = x
149
+ for t in tmp:
150
+ signstr += str(t)
151
+ signstr += "="
152
+ signstr = signstr[:-1]
153
+ signstr += "&"
154
+ signstr = signstr[:-1]
155
+ return signstr
156
+
157
+ def start(self):
158
+ logger.info("synthesizer start: begin")
159
+
160
+ def _close_conn(reason):
161
+ ta = time.time()
162
+ self.ws.close()
163
+ tb = time.time()
164
+ logger.info("client has closed connection ({}), cost {} ms".format(reason, int((tb-ta)*1000)))
165
+
166
+ def _on_data(ws, data, opcode, flag):
167
+ # NOTE print all message that client received
168
+ # logger.info("data={} opcode={} flag={}".format(data, opcode, flag))
169
+ if opcode == ABNF.OPCODE_BINARY:
170
+ self.listener.on_audio_result(data) # <class 'bytes'>
171
+ pass
172
+ elif opcode == ABNF.OPCODE_TEXT:
173
+ resp = json.loads(data) # WSResponseMessage
174
+ if resp['code'] != 0:
175
+ logger.error("server synthesis fail request_id={} code={} msg={}".format(
176
+ resp['request_id'], resp['code'], resp['message']
177
+ ))
178
+ self.listener.on_synthesis_fail(resp)
179
+ return
180
+ if "final" in resp and resp['final'] == 1:
181
+ logger.info("recv FINAL frame")
182
+ self.status = FINAL
183
+ _close_conn("after recv final")
184
+ self.listener.on_synthesis_end()
185
+ return
186
+ if "result" in resp:
187
+ if "subtitles" in resp["result"] and resp["result"]["subtitles"] is not None:
188
+ self.listener.on_text_result(resp)
189
+ return
190
+ else:
191
+ logger.error("invalid on_data code, opcode=".format(opcode))
192
+
193
+ def _on_error(ws, error):
194
+ if self.status == FINAL or self.status == CLOSED:
195
+ return
196
+ self.status = ERROR
197
+ logger.error("error={}, session_id={}".format(error, self.session_id))
198
+ _close_conn("after recv error")
199
+
200
+ def _on_close(ws, close_status_code, close_msg):
201
+ logger.info("conn closed, close_status_code={} close_msg={}".format(close_status_code, close_msg))
202
+ self.status = CLOSED
203
+
204
+ def _on_open(ws):
205
+ logger.info("conn opened")
206
+ self.status = OPENED
207
+
208
+ session_id = str(uuid.uuid1())
209
+ params = self.__gen_params(session_id)
210
+ signature = self.__gen_signature(params)
211
+ requrl = self.__create_query_string(params)
212
+
213
+ autho = urllib.parse.quote(signature)
214
+ requrl += "&Signature=%s" % autho
215
+
216
+ self.ws = WebSocketApp(requrl, None,
217
+ on_error=_on_error, on_close=_on_close,
218
+ on_data=_on_data)
219
+ self.ws.on_open = _on_open
220
+
221
+ self.wst = threading.Thread(target=self.ws.run_forever)
222
+ self.wst.daemon = True
223
+ self.wst.start()
224
+ self.status = STARTED
225
+ self.listener.on_synthesis_start(session_id)
226
+
227
+ logger.info("synthesizer start: end")
228
+
229
+ def wait(self):
230
+ logger.info("synthesizer wait: begin")
231
+ if self.ws:
232
+ if self.wst and self.wst.is_alive():
233
+ self.wst.join()
234
+ logger.info("synthesizer wait: end")
File without changes