smartpi 1.1.4__py3-none-any.whl → 1.1.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- smartpi/__init__.py +8 -0
- smartpi/_gui.pyc +0 -0
- smartpi/ai_asr.pyc +0 -0
- smartpi/ai_llm.pyc +0 -0
- smartpi/ai_tts.pyc +0 -0
- smartpi/ai_vad.pyc +0 -0
- smartpi/audio.pyc +0 -0
- smartpi/base_driver.pyc +0 -0
- smartpi/camera.pyc +0 -0
- smartpi/color_sensor.pyc +0 -0
- smartpi/cw2015.pyc +0 -0
- smartpi/flash.pyc +0 -0
- smartpi/humidity.pyc +0 -0
- smartpi/led.pyc +0 -0
- smartpi/light_sensor.pyc +0 -0
- smartpi/local_model.pyc +0 -0
- smartpi/mcp_client.pyc +0 -0
- smartpi/mcp_fastmcp.pyc +0 -0
- smartpi/mcp_intent_recognizer.pyc +0 -0
- smartpi/models/__init__.py +0 -0
- smartpi/models/snakers4_silero-vad/__init__.py +0 -0
- smartpi/models/snakers4_silero-vad/hubconf.py +56 -0
- smartpi/motor.pyc +0 -0
- smartpi/move.pyc +0 -0
- smartpi/onnx_hand_workflow.pyc +0 -0
- smartpi/onnx_image_workflow.pyc +0 -0
- smartpi/onnx_pose_workflow.pyc +0 -0
- smartpi/onnx_text_workflow.pyc +0 -0
- smartpi/onnx_voice_workflow.pyc +0 -0
- smartpi/posemodel/__init__.py +0 -0
- smartpi/posenet_utils.pyc +0 -0
- smartpi/rknn_hand_workflow.pyc +0 -0
- smartpi/rknn_image_workflow.pyc +0 -0
- smartpi/rknn_pose_workflow.pyc +0 -0
- smartpi/rknn_text_workflow.pyc +0 -0
- smartpi/rknn_voice_workflow.pyc +0 -0
- smartpi/servo.pyc +0 -0
- smartpi/temperature.pyc +0 -0
- smartpi/tencentcloud-speech-sdk-python/__init__.py +1 -0
- smartpi/tencentcloud-speech-sdk-python/asr/__init__.py +0 -0
- smartpi/tencentcloud-speech-sdk-python/asr/flash_recognizer.py +178 -0
- smartpi/tencentcloud-speech-sdk-python/asr/speech_recognizer.py +311 -0
- smartpi/tencentcloud-speech-sdk-python/common/__init__.py +1 -0
- smartpi/tencentcloud-speech-sdk-python/common/credential.py +6 -0
- smartpi/tencentcloud-speech-sdk-python/common/log.py +16 -0
- smartpi/tencentcloud-speech-sdk-python/common/utils.py +7 -0
- smartpi/tencentcloud-speech-sdk-python/soe/__init__.py +0 -0
- smartpi/tencentcloud-speech-sdk-python/soe/speaking_assessment.py +276 -0
- smartpi/tencentcloud-speech-sdk-python/tts/__init__.py +0 -0
- smartpi/tencentcloud-speech-sdk-python/tts/flowing_speech_synthesizer.py +294 -0
- smartpi/tencentcloud-speech-sdk-python/tts/speech_synthesizer.py +144 -0
- smartpi/tencentcloud-speech-sdk-python/tts/speech_synthesizer_ws.py +234 -0
- smartpi/tencentcloud-speech-sdk-python/vc/__init__.py +0 -0
- smartpi/tencentcloud-speech-sdk-python/vc/speech_convertor_ws.py +237 -0
- smartpi/text_gte_model/__init__.py +0 -0
- smartpi/text_gte_model/config/__init__.py +0 -0
- smartpi/text_gte_model/gte/__init__.py +0 -0
- smartpi/touch_sensor.pyc +0 -0
- smartpi/trace.pyc +0 -0
- smartpi/ultrasonic.pyc +0 -0
- {smartpi-1.1.4.dist-info → smartpi-1.1.6.dist-info}/METADATA +1 -1
- smartpi-1.1.6.dist-info/RECORD +77 -0
- smartpi/__init__.pyc +0 -0
- smartpi/models/__init__.pyc +0 -0
- smartpi/models/snakers4_silero-vad/__init__.pyc +0 -0
- smartpi/models/snakers4_silero-vad/hubconf.pyc +0 -0
- smartpi/posemodel/__init__.pyc +0 -0
- smartpi/tencentcloud-speech-sdk-python/__init__.pyc +0 -0
- smartpi/tencentcloud-speech-sdk-python/asr/__init__.pyc +0 -0
- smartpi/tencentcloud-speech-sdk-python/asr/flash_recognizer.pyc +0 -0
- smartpi/tencentcloud-speech-sdk-python/asr/speech_recognizer.pyc +0 -0
- smartpi/tencentcloud-speech-sdk-python/common/__init__.pyc +0 -0
- smartpi/tencentcloud-speech-sdk-python/common/credential.pyc +0 -0
- smartpi/tencentcloud-speech-sdk-python/common/log.pyc +0 -0
- smartpi/tencentcloud-speech-sdk-python/common/utils.pyc +0 -0
- smartpi/tencentcloud-speech-sdk-python/soe/__init__.pyc +0 -0
- smartpi/tencentcloud-speech-sdk-python/soe/speaking_assessment.pyc +0 -0
- smartpi/tencentcloud-speech-sdk-python/tts/__init__.pyc +0 -0
- smartpi/tencentcloud-speech-sdk-python/tts/flowing_speech_synthesizer.pyc +0 -0
- smartpi/tencentcloud-speech-sdk-python/tts/speech_synthesizer.pyc +0 -0
- smartpi/tencentcloud-speech-sdk-python/tts/speech_synthesizer_ws.pyc +0 -0
- smartpi/tencentcloud-speech-sdk-python/vc/__init__.pyc +0 -0
- smartpi/tencentcloud-speech-sdk-python/vc/speech_convertor_ws.pyc +0 -0
- smartpi/text_gte_model/__init__.pyc +0 -0
- smartpi/text_gte_model/config/__init__.pyc +0 -0
- smartpi/text_gte_model/gte/__init__.pyc +0 -0
- smartpi-1.1.4.dist-info/RECORD +0 -77
- {smartpi-1.1.4.dist-info → smartpi-1.1.6.dist-info}/WHEEL +0 -0
- {smartpi-1.1.4.dist-info → smartpi-1.1.6.dist-info}/top_level.txt +0 -0
smartpi/__init__.py
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
from ._gui import gui
|
|
2
|
+
from .base_driver import P1, P2, P3, P4, P5, P6, M1, M2, M3, M4, M5, M6
|
|
3
|
+
|
|
4
|
+
__all__ = ["base_driver","gui","audio","ultrasonic","touch_sensor","temperature","humidity","light_sensor","color_sensor","motor","servo","led","flash",
|
|
5
|
+
"trace","move","local_model","P1", "P2", "P3", "P4", "P5", "P6", "M1", "M2", "M3", "M4", "M5", "M6"]
|
|
6
|
+
|
|
7
|
+
__version__ = "1.1.6"
|
|
8
|
+
|
smartpi/_gui.pyc
CHANGED
|
Binary file
|
smartpi/ai_asr.pyc
CHANGED
|
Binary file
|
smartpi/ai_llm.pyc
CHANGED
|
Binary file
|
smartpi/ai_tts.pyc
CHANGED
|
Binary file
|
smartpi/ai_vad.pyc
CHANGED
|
Binary file
|
smartpi/audio.pyc
CHANGED
|
Binary file
|
smartpi/base_driver.pyc
CHANGED
|
Binary file
|
smartpi/camera.pyc
CHANGED
|
Binary file
|
smartpi/color_sensor.pyc
CHANGED
|
Binary file
|
smartpi/cw2015.pyc
CHANGED
|
Binary file
|
smartpi/flash.pyc
CHANGED
|
Binary file
|
smartpi/humidity.pyc
CHANGED
|
Binary file
|
smartpi/led.pyc
CHANGED
|
Binary file
|
smartpi/light_sensor.pyc
CHANGED
|
Binary file
|
smartpi/local_model.pyc
CHANGED
|
Binary file
|
smartpi/mcp_client.pyc
CHANGED
|
Binary file
|
smartpi/mcp_fastmcp.pyc
CHANGED
|
Binary file
|
|
Binary file
|
|
File without changes
|
|
File without changes
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
dependencies = ['torch', 'torchaudio']
|
|
2
|
+
import torch
|
|
3
|
+
import os
|
|
4
|
+
import sys
|
|
5
|
+
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src'))
|
|
6
|
+
from silero_vad.utils_vad import (init_jit_model,
|
|
7
|
+
get_speech_timestamps,
|
|
8
|
+
save_audio,
|
|
9
|
+
read_audio,
|
|
10
|
+
VADIterator,
|
|
11
|
+
collect_chunks,
|
|
12
|
+
OnnxWrapper)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def versiontuple(v):
|
|
16
|
+
splitted = v.split('+')[0].split(".")
|
|
17
|
+
version_list = []
|
|
18
|
+
for i in splitted:
|
|
19
|
+
try:
|
|
20
|
+
version_list.append(int(i))
|
|
21
|
+
except:
|
|
22
|
+
version_list.append(0)
|
|
23
|
+
return tuple(version_list)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def silero_vad(onnx=False, force_onnx_cpu=False, opset_version=16):
|
|
27
|
+
"""Silero Voice Activity Detector
|
|
28
|
+
Returns a model with a set of utils
|
|
29
|
+
Please see https://github.com/snakers4/silero-vad for usage examples
|
|
30
|
+
"""
|
|
31
|
+
available_ops = [15, 16]
|
|
32
|
+
if onnx and opset_version not in available_ops:
|
|
33
|
+
raise Exception(f'Available ONNX opset_version: {available_ops}')
|
|
34
|
+
|
|
35
|
+
if not onnx:
|
|
36
|
+
installed_version = torch.__version__
|
|
37
|
+
supported_version = '1.12.0'
|
|
38
|
+
if versiontuple(installed_version) < versiontuple(supported_version):
|
|
39
|
+
raise Exception(f'Please install torch {supported_version} or greater ({installed_version} installed)')
|
|
40
|
+
|
|
41
|
+
model_dir = os.path.join(os.path.dirname(__file__), 'src', 'silero_vad', 'data')
|
|
42
|
+
if onnx:
|
|
43
|
+
if opset_version == 16:
|
|
44
|
+
model_name = 'silero_vad.onnx'
|
|
45
|
+
else:
|
|
46
|
+
model_name = f'silero_vad_16k_op{opset_version}.onnx'
|
|
47
|
+
model = OnnxWrapper(os.path.join(model_dir, model_name), force_onnx_cpu)
|
|
48
|
+
else:
|
|
49
|
+
model = init_jit_model(os.path.join(model_dir, 'silero_vad.jit'))
|
|
50
|
+
utils = (get_speech_timestamps,
|
|
51
|
+
save_audio,
|
|
52
|
+
read_audio,
|
|
53
|
+
VADIterator,
|
|
54
|
+
collect_chunks)
|
|
55
|
+
|
|
56
|
+
return model, utils
|
smartpi/motor.pyc
CHANGED
|
Binary file
|
smartpi/move.pyc
CHANGED
|
Binary file
|
smartpi/onnx_hand_workflow.pyc
CHANGED
|
Binary file
|
smartpi/onnx_image_workflow.pyc
CHANGED
|
Binary file
|
smartpi/onnx_pose_workflow.pyc
CHANGED
|
Binary file
|
smartpi/onnx_text_workflow.pyc
CHANGED
|
Binary file
|
smartpi/onnx_voice_workflow.pyc
CHANGED
|
Binary file
|
|
File without changes
|
smartpi/posenet_utils.pyc
CHANGED
|
Binary file
|
smartpi/rknn_hand_workflow.pyc
CHANGED
|
Binary file
|
smartpi/rknn_image_workflow.pyc
CHANGED
|
Binary file
|
smartpi/rknn_pose_workflow.pyc
CHANGED
|
Binary file
|
smartpi/rknn_text_workflow.pyc
CHANGED
|
Binary file
|
smartpi/rknn_voice_workflow.pyc
CHANGED
|
Binary file
|
smartpi/servo.pyc
CHANGED
|
Binary file
|
smartpi/temperature.pyc
CHANGED
|
Binary file
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# -*- coding:utf-8 -*-
|
|
File without changes
|
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
import requests
|
|
3
|
+
import hmac
|
|
4
|
+
import hashlib
|
|
5
|
+
import base64
|
|
6
|
+
import time
|
|
7
|
+
import random
|
|
8
|
+
import os
|
|
9
|
+
import json
|
|
10
|
+
from common import credential
|
|
11
|
+
|
|
12
|
+
#录音识别极速版使用
|
|
13
|
+
class FlashRecognitionRequest:
|
|
14
|
+
def __init__(self, engine_type):
|
|
15
|
+
self.engine_type = engine_type
|
|
16
|
+
self.speaker_diarization = 0
|
|
17
|
+
self.hotword_id = ""
|
|
18
|
+
self.hotword_list = ""
|
|
19
|
+
self.input_sample_rate = 0
|
|
20
|
+
self.customization_id = ""
|
|
21
|
+
self.filter_dirty = 0
|
|
22
|
+
self.filter_modal = 0
|
|
23
|
+
self.filter_punc = 0
|
|
24
|
+
self.convert_num_mode = 1
|
|
25
|
+
self.word_info = 0
|
|
26
|
+
self.voice_format = ""
|
|
27
|
+
self.first_channel_only = 1
|
|
28
|
+
self.reinforce_hotword = 0
|
|
29
|
+
self.sentence_max_length = 0
|
|
30
|
+
|
|
31
|
+
def set_first_channel_only(self, first_channel_only):
|
|
32
|
+
self.first_channel_only = first_channel_only
|
|
33
|
+
|
|
34
|
+
def set_speaker_diarization(self, speaker_diarization):
|
|
35
|
+
self.speaker_diarization = speaker_diarization
|
|
36
|
+
|
|
37
|
+
def set_filter_dirty(self, filter_dirty):
|
|
38
|
+
self.filter_dirty = filter_dirty
|
|
39
|
+
|
|
40
|
+
def set_filter_modal(self, filter_modal):
|
|
41
|
+
self.filter_modal = filter_modal
|
|
42
|
+
|
|
43
|
+
def set_filter_punc(self, filter_punc):
|
|
44
|
+
self.filter_punc = filter_punc
|
|
45
|
+
|
|
46
|
+
def set_convert_num_mode(self, convert_num_mode):
|
|
47
|
+
self.convert_num_mode = convert_num_mode
|
|
48
|
+
|
|
49
|
+
def set_word_info(self, word_info):
|
|
50
|
+
self.word_info = word_info
|
|
51
|
+
|
|
52
|
+
def set_hotword_id(self, hotword_id):
|
|
53
|
+
self.hotword_id = hotword_id
|
|
54
|
+
|
|
55
|
+
def set_hotword_list(self, hotword_list):
|
|
56
|
+
self.hotword_list = hotword_list
|
|
57
|
+
|
|
58
|
+
def set_input_sample_rate(self, input_sample_rate):
|
|
59
|
+
self.input_sample_rate = input_sample_rate
|
|
60
|
+
|
|
61
|
+
def set_customization_id(self, customization_id):
|
|
62
|
+
self.customization_id = customization_id
|
|
63
|
+
|
|
64
|
+
def set_voice_format(self, voice_format):
|
|
65
|
+
self.voice_format = voice_format
|
|
66
|
+
|
|
67
|
+
def set_sentence_max_length(self, sentence_max_length):
|
|
68
|
+
self.sentence_max_length = sentence_max_length
|
|
69
|
+
|
|
70
|
+
def set_reinforce_hotword(self, reinforce_hotword):
|
|
71
|
+
self.reinforce_hotword = reinforce_hotword
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
class FlashRecognizer:
|
|
76
|
+
'''
|
|
77
|
+
reponse:
|
|
78
|
+
字段名 类型
|
|
79
|
+
request_id string
|
|
80
|
+
status Integer
|
|
81
|
+
message String
|
|
82
|
+
audio_duration Integer
|
|
83
|
+
flash_result Result Array
|
|
84
|
+
|
|
85
|
+
Result的结构体格式为:
|
|
86
|
+
text String
|
|
87
|
+
channel_id Integer
|
|
88
|
+
sentence_list Sentence Array
|
|
89
|
+
|
|
90
|
+
Sentence的结构体格式为:
|
|
91
|
+
text String
|
|
92
|
+
start_time Integer
|
|
93
|
+
end_time Integer
|
|
94
|
+
speaker_id Integer
|
|
95
|
+
word_list Word Array
|
|
96
|
+
|
|
97
|
+
Word的类型为:
|
|
98
|
+
word String
|
|
99
|
+
start_time Integer
|
|
100
|
+
end_time Integer
|
|
101
|
+
stable_flag: Integer
|
|
102
|
+
'''
|
|
103
|
+
|
|
104
|
+
def __init__(self, appid, credential):
|
|
105
|
+
self.credential = credential
|
|
106
|
+
self.appid = appid
|
|
107
|
+
|
|
108
|
+
def _format_sign_string(self, param):
|
|
109
|
+
signstr = "POSTasr.cloud.tencent.com/asr/flash/v1/"
|
|
110
|
+
for t in param:
|
|
111
|
+
if 'appid' in t:
|
|
112
|
+
signstr += str(t[1])
|
|
113
|
+
break
|
|
114
|
+
signstr += "?"
|
|
115
|
+
for x in param:
|
|
116
|
+
tmp = x
|
|
117
|
+
if 'appid' in x:
|
|
118
|
+
continue
|
|
119
|
+
for t in tmp:
|
|
120
|
+
signstr += str(t)
|
|
121
|
+
signstr += "="
|
|
122
|
+
signstr = signstr[:-1]
|
|
123
|
+
signstr += "&"
|
|
124
|
+
signstr = signstr[:-1]
|
|
125
|
+
return signstr
|
|
126
|
+
|
|
127
|
+
def _build_header(self):
|
|
128
|
+
header = dict()
|
|
129
|
+
header["Host"] = "asr.cloud.tencent.com"
|
|
130
|
+
return header
|
|
131
|
+
|
|
132
|
+
def _sign(self, signstr, secret_key):
|
|
133
|
+
hmacstr = hmac.new(secret_key.encode('utf-8'),
|
|
134
|
+
signstr.encode('utf-8'), hashlib.sha1).digest()
|
|
135
|
+
s = base64.b64encode(hmacstr)
|
|
136
|
+
s = s.decode('utf-8')
|
|
137
|
+
return s
|
|
138
|
+
|
|
139
|
+
def _build_req_with_signature(self, secret_key, params, header):
|
|
140
|
+
query = sorted(params.items(), key=lambda d: d[0])
|
|
141
|
+
signstr = self._format_sign_string(query)
|
|
142
|
+
signature = self._sign(signstr, secret_key)
|
|
143
|
+
header["Authorization"] = signature
|
|
144
|
+
requrl = "https://"
|
|
145
|
+
requrl += signstr[4::]
|
|
146
|
+
return requrl
|
|
147
|
+
|
|
148
|
+
def _create_query_arr(self, req):
|
|
149
|
+
query_arr = dict()
|
|
150
|
+
query_arr['appid'] = self.appid
|
|
151
|
+
query_arr['secretid'] = self.credential.secret_id
|
|
152
|
+
query_arr['timestamp'] = str(int(time.time()))
|
|
153
|
+
query_arr['engine_type'] = req.engine_type
|
|
154
|
+
query_arr['voice_format'] = req.voice_format
|
|
155
|
+
query_arr['speaker_diarization'] = req.speaker_diarization
|
|
156
|
+
if req.hotword_id != "":
|
|
157
|
+
query_arr['hotword_id'] = req.hotword_id
|
|
158
|
+
if req.hotword_list != "":
|
|
159
|
+
query_arr['hotword_list'] = req.hotword_list
|
|
160
|
+
if req.input_sample_rate != 0:
|
|
161
|
+
query_arr['input_sample_rate'] = req.input_sample_rate
|
|
162
|
+
query_arr['customization_id'] = req.customization_id
|
|
163
|
+
query_arr['filter_dirty'] = req.filter_dirty
|
|
164
|
+
query_arr['filter_modal'] = req.filter_modal
|
|
165
|
+
query_arr['filter_punc'] = req.filter_punc
|
|
166
|
+
query_arr['convert_num_mode'] = req.convert_num_mode
|
|
167
|
+
query_arr['word_info'] = req.word_info
|
|
168
|
+
query_arr['first_channel_only'] = req.first_channel_only
|
|
169
|
+
query_arr['reinforce_hotword'] = req.reinforce_hotword
|
|
170
|
+
query_arr['sentence_max_length'] = req.sentence_max_length
|
|
171
|
+
return query_arr
|
|
172
|
+
|
|
173
|
+
def recognize(self, req, data):
|
|
174
|
+
header = self._build_header()
|
|
175
|
+
query_arr = self._create_query_arr(req)
|
|
176
|
+
req_url = self._build_req_with_signature(self.credential.secret_key, query_arr, header)
|
|
177
|
+
r = requests.post(req_url, headers=header, data=data)
|
|
178
|
+
return r.text
|
|
@@ -0,0 +1,311 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
import sys
|
|
3
|
+
import hmac
|
|
4
|
+
import hashlib
|
|
5
|
+
import base64
|
|
6
|
+
import time
|
|
7
|
+
import json
|
|
8
|
+
import threading
|
|
9
|
+
import websocket
|
|
10
|
+
import uuid
|
|
11
|
+
import urllib
|
|
12
|
+
from common.log import logger
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def is_python3():
|
|
16
|
+
if sys.version > '3':
|
|
17
|
+
return True
|
|
18
|
+
return False
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
#实时识别语音使用
|
|
22
|
+
class SpeechRecognitionListener():
|
|
23
|
+
'''
|
|
24
|
+
reponse:
|
|
25
|
+
on_recognition_start的返回只有voice_id字段。
|
|
26
|
+
on_fail 只有voice_id、code、message字段。
|
|
27
|
+
on_recognition_complete没有result字段。
|
|
28
|
+
其余消息包含所有字段。
|
|
29
|
+
字段名 类型
|
|
30
|
+
code Integer
|
|
31
|
+
message String
|
|
32
|
+
voice_id String
|
|
33
|
+
message_id String
|
|
34
|
+
result Result
|
|
35
|
+
final Integer
|
|
36
|
+
|
|
37
|
+
Result的结构体格式为:
|
|
38
|
+
slice_type Integer
|
|
39
|
+
index Integer
|
|
40
|
+
start_time Integer
|
|
41
|
+
end_time Integer
|
|
42
|
+
voice_text_str String
|
|
43
|
+
word_size Integer
|
|
44
|
+
word_list Word Array
|
|
45
|
+
|
|
46
|
+
Word的类型为:
|
|
47
|
+
word String
|
|
48
|
+
start_time Integer
|
|
49
|
+
end_time Integer
|
|
50
|
+
stable_flag:Integer
|
|
51
|
+
'''
|
|
52
|
+
|
|
53
|
+
def on_recognition_start(self, response):
|
|
54
|
+
pass
|
|
55
|
+
|
|
56
|
+
def on_sentence_begin(self, response):
|
|
57
|
+
pass
|
|
58
|
+
|
|
59
|
+
def on_recognition_result_change(self, response):
|
|
60
|
+
pass
|
|
61
|
+
|
|
62
|
+
def on_sentence_end(self, response):
|
|
63
|
+
pass
|
|
64
|
+
|
|
65
|
+
def on_recognition_complete(self, response):
|
|
66
|
+
pass
|
|
67
|
+
|
|
68
|
+
def on_fail(self, response):
|
|
69
|
+
pass
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
NOTOPEN = 0
|
|
73
|
+
STARTED = 1
|
|
74
|
+
OPENED = 2
|
|
75
|
+
FINAL = 3
|
|
76
|
+
ERROR = 4
|
|
77
|
+
CLOSED = 5
|
|
78
|
+
|
|
79
|
+
#实时识别语音使用
|
|
80
|
+
class SpeechRecognizer:
|
|
81
|
+
|
|
82
|
+
def __init__(self, appid, credential, engine_model_type, listener):
|
|
83
|
+
self.result = ""
|
|
84
|
+
self.credential = credential
|
|
85
|
+
self.appid = appid
|
|
86
|
+
self.engine_model_type = engine_model_type
|
|
87
|
+
self.status = NOTOPEN
|
|
88
|
+
self.ws = None
|
|
89
|
+
self.wst = None
|
|
90
|
+
self.voice_id = ""
|
|
91
|
+
self.new_start = 0
|
|
92
|
+
self.listener = listener
|
|
93
|
+
self.filter_dirty = 0
|
|
94
|
+
self.filter_modal = 0
|
|
95
|
+
self.filter_punc = 0
|
|
96
|
+
self.convert_num_mode = 0
|
|
97
|
+
self.word_info = 0
|
|
98
|
+
self.need_vad = 0
|
|
99
|
+
self.vad_silence_time = 0
|
|
100
|
+
self.hotword_id = ""
|
|
101
|
+
self.hotword_list = ""
|
|
102
|
+
self.reinforce_hotword = 0
|
|
103
|
+
self.noise_threshold = 0
|
|
104
|
+
self.voice_format = 4
|
|
105
|
+
self.nonce = ""
|
|
106
|
+
self.replace_text_id = ""
|
|
107
|
+
|
|
108
|
+
def set_filter_dirty(self, filter_dirty):
|
|
109
|
+
self.filter_dirty = filter_dirty
|
|
110
|
+
|
|
111
|
+
def set_filter_modal(self, filter_modal):
|
|
112
|
+
self.filter_modal = filter_modal
|
|
113
|
+
|
|
114
|
+
def set_filter_punc(self, filter_punc):
|
|
115
|
+
self.filter_punc = filter_punc
|
|
116
|
+
|
|
117
|
+
def set_convert_num_mode(self, convert_num_mode):
|
|
118
|
+
self.convert_num_mode = convert_num_mode
|
|
119
|
+
|
|
120
|
+
def set_word_info(self, word_info):
|
|
121
|
+
self.word_info = word_info
|
|
122
|
+
|
|
123
|
+
def set_need_vad(self, need_vad):
|
|
124
|
+
self.need_vad = need_vad
|
|
125
|
+
|
|
126
|
+
def set_vad_silence_time(self, vad_silence_time):
|
|
127
|
+
self.vad_silence_time = vad_silence_time
|
|
128
|
+
|
|
129
|
+
def set_hotword_id(self, hotword_id):
|
|
130
|
+
self.hotword_id = hotword_id
|
|
131
|
+
|
|
132
|
+
def set_hotword_list(self, hotword_list):
|
|
133
|
+
self.hotword_list = hotword_list
|
|
134
|
+
|
|
135
|
+
def set_voice_format(self, voice_format):
|
|
136
|
+
self.voice_format = voice_format
|
|
137
|
+
|
|
138
|
+
def set_nonce(self, nonce):
|
|
139
|
+
self.nonce = nonce
|
|
140
|
+
|
|
141
|
+
def set_reinforce_hotword(self, reinforce_hotword):
|
|
142
|
+
self.reinforce_hotword = reinforce_hotword
|
|
143
|
+
|
|
144
|
+
def set_noise_threshold(self, noise_threshold):
|
|
145
|
+
self.noise_threshold = noise_threshold
|
|
146
|
+
|
|
147
|
+
def set_replace_text_id(self, replace_text_id):
|
|
148
|
+
self.replace_text_id = replace_text_id
|
|
149
|
+
|
|
150
|
+
def format_sign_string(self, param):
|
|
151
|
+
signstr = "asr.cloud.tencent.com/asr/v2/"
|
|
152
|
+
for t in param:
|
|
153
|
+
if 'appid' in t:
|
|
154
|
+
signstr += str(t[1])
|
|
155
|
+
break
|
|
156
|
+
signstr += "?"
|
|
157
|
+
for x in param:
|
|
158
|
+
tmp = x
|
|
159
|
+
if 'appid' in x:
|
|
160
|
+
continue
|
|
161
|
+
for t in tmp:
|
|
162
|
+
signstr += str(t)
|
|
163
|
+
signstr += "="
|
|
164
|
+
signstr = signstr[:-1]
|
|
165
|
+
signstr += "&"
|
|
166
|
+
signstr = signstr[:-1]
|
|
167
|
+
return signstr
|
|
168
|
+
|
|
169
|
+
def create_query_string(self, param):
|
|
170
|
+
signstr = "wss://asr.cloud.tencent.com/asr/v2/"
|
|
171
|
+
for t in param:
|
|
172
|
+
if 'appid' in t:
|
|
173
|
+
signstr += str(t[1])
|
|
174
|
+
break
|
|
175
|
+
signstr += "?"
|
|
176
|
+
for x in param:
|
|
177
|
+
tmp = x
|
|
178
|
+
if 'appid' in x:
|
|
179
|
+
continue
|
|
180
|
+
for t in tmp:
|
|
181
|
+
signstr += str(t)
|
|
182
|
+
signstr += "="
|
|
183
|
+
signstr = signstr[:-1]
|
|
184
|
+
signstr += "&"
|
|
185
|
+
signstr = signstr[:-1]
|
|
186
|
+
return signstr
|
|
187
|
+
|
|
188
|
+
def sign(self, signstr, secret_key):
|
|
189
|
+
hmacstr = hmac.new(secret_key.encode('utf-8'),
|
|
190
|
+
signstr.encode('utf-8'), hashlib.sha1).digest()
|
|
191
|
+
s = base64.b64encode(hmacstr)
|
|
192
|
+
s = s.decode('utf-8')
|
|
193
|
+
return s
|
|
194
|
+
|
|
195
|
+
def create_query_arr(self):
|
|
196
|
+
query_arr = dict()
|
|
197
|
+
|
|
198
|
+
query_arr['appid'] = self.appid
|
|
199
|
+
query_arr['sub_service_type'] = 1
|
|
200
|
+
query_arr['engine_model_type'] = self.engine_model_type
|
|
201
|
+
query_arr['filter_dirty'] = self.filter_dirty
|
|
202
|
+
query_arr['filter_modal'] = self.filter_modal
|
|
203
|
+
query_arr['filter_punc'] = self.filter_punc
|
|
204
|
+
query_arr['needvad'] = self.need_vad
|
|
205
|
+
query_arr['convert_num_mode'] = self.convert_num_mode
|
|
206
|
+
query_arr['word_info'] = self.word_info
|
|
207
|
+
if self.vad_silence_time != 0:
|
|
208
|
+
query_arr['vad_silence_time'] = self.vad_silence_time
|
|
209
|
+
if self.hotword_id != "":
|
|
210
|
+
query_arr['hotword_id'] = self.hotword_id
|
|
211
|
+
if self.hotword_list != "":
|
|
212
|
+
query_arr['hotword_list'] = self.hotword_list
|
|
213
|
+
if self.replace_text_id != "":
|
|
214
|
+
query_arr['replace_text_id'] = self.replace_text_id
|
|
215
|
+
query_arr['secretid'] = self.credential.secret_id
|
|
216
|
+
query_arr['voice_format'] = self.voice_format
|
|
217
|
+
query_arr['voice_id'] = self.voice_id
|
|
218
|
+
query_arr['timestamp'] = str(int(time.time()))
|
|
219
|
+
if self.nonce != "":
|
|
220
|
+
query_arr['nonce'] = self.nonce
|
|
221
|
+
else:
|
|
222
|
+
query_arr['nonce'] = query_arr['timestamp']
|
|
223
|
+
query_arr['expired'] = int(time.time()) + 24 * 60 * 60
|
|
224
|
+
query_arr['reinforce_hotword'] = self.reinforce_hotword
|
|
225
|
+
query_arr['noise_threshold'] = self.noise_threshold
|
|
226
|
+
return query_arr
|
|
227
|
+
|
|
228
|
+
def stop(self):
|
|
229
|
+
if self.status == OPENED:
|
|
230
|
+
msg = {}
|
|
231
|
+
msg['type'] = "end"
|
|
232
|
+
text_str = json.dumps(msg)
|
|
233
|
+
self.ws.sock.send(text_str)
|
|
234
|
+
if self.ws:
|
|
235
|
+
if self.wst and self.wst.is_alive():
|
|
236
|
+
self.wst.join()
|
|
237
|
+
self.ws.close()
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
def write(self, data):
|
|
241
|
+
while self.status == STARTED:
|
|
242
|
+
time.sleep(0.1)
|
|
243
|
+
if self.status == OPENED:
|
|
244
|
+
self.ws.sock.send_binary(data)
|
|
245
|
+
|
|
246
|
+
def start(self):
|
|
247
|
+
def on_message(ws, message):
|
|
248
|
+
response = json.loads(message)
|
|
249
|
+
response['voice_id'] = self.voice_id
|
|
250
|
+
if response['code'] != 0:
|
|
251
|
+
logger.error("%s server recognition fail %s" %
|
|
252
|
+
(response['voice_id'], response['message']))
|
|
253
|
+
self.listener.on_fail(response)
|
|
254
|
+
return
|
|
255
|
+
if "final" in response and response["final"] == 1:
|
|
256
|
+
self.status = FINAL
|
|
257
|
+
self.result = message
|
|
258
|
+
self.listener.on_recognition_complete(response)
|
|
259
|
+
logger.info("%s recognition complete" % response['voice_id'])
|
|
260
|
+
return
|
|
261
|
+
if "result" in response.keys():
|
|
262
|
+
if response["result"]['slice_type'] == 0:
|
|
263
|
+
self.listener.on_sentence_begin(response)
|
|
264
|
+
return
|
|
265
|
+
elif response["result"]["slice_type"] == 2:
|
|
266
|
+
self.listener.on_sentence_end(response)
|
|
267
|
+
return
|
|
268
|
+
elif response["result"]["slice_type"] == 1:
|
|
269
|
+
self.listener.on_recognition_result_change(response)
|
|
270
|
+
return
|
|
271
|
+
|
|
272
|
+
def on_error(ws, error):
|
|
273
|
+
if self.status == FINAL :
|
|
274
|
+
return
|
|
275
|
+
logger.error("websocket error %s voice id %s" %
|
|
276
|
+
(format(error), self.voice_id))
|
|
277
|
+
self.status = ERROR
|
|
278
|
+
|
|
279
|
+
def on_close(ws):
|
|
280
|
+
self.status = CLOSED
|
|
281
|
+
logger.info("websocket closed voice id %s" %
|
|
282
|
+
self.voice_id)
|
|
283
|
+
|
|
284
|
+
def on_open(ws):
|
|
285
|
+
self.status = OPENED
|
|
286
|
+
|
|
287
|
+
query_arr = self.create_query_arr()
|
|
288
|
+
if self.voice_id == "":
|
|
289
|
+
query_arr['voice_id'] = str(uuid.uuid1())
|
|
290
|
+
self.voice_id = query_arr['voice_id']
|
|
291
|
+
query = sorted(query_arr.items(), key=lambda d: d[0])
|
|
292
|
+
signstr = self.format_sign_string(query)
|
|
293
|
+
|
|
294
|
+
autho = self.sign(signstr, self.credential.secret_key)
|
|
295
|
+
requrl = self.create_query_string(query)
|
|
296
|
+
if is_python3():
|
|
297
|
+
autho = urllib.parse.quote(autho)
|
|
298
|
+
else:
|
|
299
|
+
autho = urllib.quote(autho)
|
|
300
|
+
requrl += "&signature=%s" % autho
|
|
301
|
+
self.ws = websocket.WebSocketApp(requrl, None,
|
|
302
|
+
on_error=on_error, on_close=on_close, on_message=on_message)
|
|
303
|
+
self.ws.on_open = on_open
|
|
304
|
+
self.wst = threading.Thread(target=self.ws.run_forever)
|
|
305
|
+
self.wst.daemon = True
|
|
306
|
+
self.wst.start()
|
|
307
|
+
self.status = STARTED
|
|
308
|
+
response = {}
|
|
309
|
+
response['voice_id'] = self.voice_id
|
|
310
|
+
self.listener.on_recognition_start(response)
|
|
311
|
+
logger.info("%s recognition start" % response['voice_id'])
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
#!#-*-coding:utf-8 -*-
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import logging.handlers
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
FORMAT = '%(asctime)15s %(name)s-%(levelname)s %(funcName)s:%(lineno)s %(message)s'
|
|
7
|
+
logging.basicConfig(level=logging.DEBUG, format=FORMAT)
|
|
8
|
+
logger = logging.getLogger('tencent_speech.log')
|
|
9
|
+
|
|
10
|
+
handler = logging.handlers.RotatingFileHandler('tencent_speech.log', maxBytes=1024 * 1024,
|
|
11
|
+
backupCount=5, encoding='utf-8')
|
|
12
|
+
handler.setLevel(logging.DEBUG)
|
|
13
|
+
handler.setFormatter(logging.Formatter(FORMAT))
|
|
14
|
+
logger.addHandler(handler)
|
|
15
|
+
logger.setLevel('INFO')
|
|
16
|
+
|