smartpi 0.1.40__py3-none-any.whl → 0.1.41__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. smartpi/__init__.py +1 -1
  2. smartpi/ai_asr.py +1036 -0
  3. smartpi/ai_llm.py +934 -0
  4. smartpi/ai_tts.py +938 -0
  5. smartpi/ai_vad.py +199 -0
  6. smartpi/base_driver.py +265 -11
  7. smartpi/local_model.py +432 -0
  8. smartpi/mcp_client.py +100 -0
  9. smartpi/mcp_fastmcp.py +322 -0
  10. smartpi/mcp_intent_recognizer.py +408 -0
  11. smartpi/models/__init__.py +0 -0
  12. smartpi/models/snakers4_silero-vad/__init__.py +0 -0
  13. smartpi/models/snakers4_silero-vad/hubconf.py +56 -0
  14. smartpi/models/snakers4_silero-vad/src/silero_vad/data/silero_vad.jit +0 -0
  15. smartpi/models/snakers4_silero-vad/src/silero_vad/data/silero_vad.onnx +0 -0
  16. smartpi/models/snakers4_silero-vad/src/silero_vad/data/silero_vad_16k_op15.onnx +0 -0
  17. smartpi/models/snakers4_silero-vad/src/silero_vad/data/silero_vad_half.onnx +0 -0
  18. smartpi/tencentcloud-speech-sdk-python/__init__.py +1 -0
  19. smartpi/tencentcloud-speech-sdk-python/asr/__init__.py +0 -0
  20. smartpi/tencentcloud-speech-sdk-python/asr/flash_recognizer.py +178 -0
  21. smartpi/tencentcloud-speech-sdk-python/asr/speech_recognizer.py +311 -0
  22. smartpi/tencentcloud-speech-sdk-python/common/__init__.py +1 -0
  23. smartpi/tencentcloud-speech-sdk-python/common/credential.py +6 -0
  24. smartpi/tencentcloud-speech-sdk-python/common/log.py +16 -0
  25. smartpi/tencentcloud-speech-sdk-python/common/utils.py +7 -0
  26. smartpi/tencentcloud-speech-sdk-python/examples/tts/tts_text.txt +60 -0
  27. smartpi/tencentcloud-speech-sdk-python/soe/__init__.py +0 -0
  28. smartpi/tencentcloud-speech-sdk-python/soe/speaking_assessment.py +276 -0
  29. smartpi/tencentcloud-speech-sdk-python/tts/__init__.py +0 -0
  30. smartpi/tencentcloud-speech-sdk-python/tts/flowing_speech_synthesizer.py +294 -0
  31. smartpi/tencentcloud-speech-sdk-python/tts/speech_synthesizer.py +144 -0
  32. smartpi/tencentcloud-speech-sdk-python/tts/speech_synthesizer_ws.py +234 -0
  33. smartpi/tencentcloud-speech-sdk-python/vc/__init__.py +0 -0
  34. smartpi/tencentcloud-speech-sdk-python/vc/speech_convertor_ws.py +237 -0
  35. {smartpi-0.1.40.dist-info → smartpi-0.1.41.dist-info}/METADATA +1 -1
  36. smartpi-0.1.41.dist-info/RECORD +76 -0
  37. smartpi-0.1.40.dist-info/RECORD +0 -44
  38. {smartpi-0.1.40.dist-info → smartpi-0.1.41.dist-info}/WHEEL +0 -0
  39. {smartpi-0.1.40.dist-info → smartpi-0.1.41.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,408 @@
1
+ # -*- coding: utf-8 -*-
2
+ """使用MCP优化的意图识别器"""
3
+ import asyncio
4
+ import json
5
+ import re
6
+ from typing import List, Dict, Any
7
+ from .mcp_client import MCPClient
8
+ import logging
9
+
10
+ # 配置日志
11
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ class MCPIntentRecognizer:
16
+ """基于MCP的意图识别器"""
17
+
18
+ def __init__(self, global_intents=None):
19
+ self.mcp_client = None
20
+ self.is_connected = False
21
+ self.server_url = "http://127.0.0.1:8000/mcp"
22
+ self.intent_tool_name = "recognize_intent"
23
+ self.initialize_task = None
24
+ # 初始化标志,避免重复初始化
25
+ self._initializing = False
26
+ # 存储全局意图映射
27
+ self.global_intents = global_intents if global_intents is not None else {}
28
+
29
+ # 构建英文代码到数字代码的映射
30
+ self.en_code_to_intent_code = {}
31
+ for intent_code, intent_info in self.global_intents.items():
32
+ if "en_code" in intent_info:
33
+ en_code = intent_info["en_code"]
34
+ # 支持单个英文代码或英文代码列表
35
+ if isinstance(en_code, list):
36
+ for code in en_code:
37
+ self.en_code_to_intent_code[code.lower()] = intent_code
38
+ else:
39
+ self.en_code_to_intent_code[en_code.lower()] = intent_code
40
+
41
+ logger.info(f"MCPIntentRecognizer初始化,全局意图数量: {len(self.global_intents)}, 英文代码映射数量: {len(self.en_code_to_intent_code)}")
42
+
43
+ def _get_intent_code_from_en_code(self, en_code):
44
+ """
45
+ 将英文意图代码转换为数字代码
46
+
47
+ Args:
48
+ en_code: 英文意图代码
49
+
50
+ Returns:
51
+ int: 对应的数字代码,如果未找到则返回None
52
+ """
53
+ if not en_code or not isinstance(en_code, str):
54
+ return None
55
+
56
+ # 转换为小写后查找
57
+ return self.en_code_to_intent_code.get(en_code.lower())
58
+
59
+ def update_en_code_mapping(self):
60
+ """
61
+ 更新英文代码到数字代码的映射
62
+ 当全局意图被更新时,需要调用此方法来更新映射
63
+ """
64
+ # 清空旧的映射
65
+ self.en_code_to_intent_code.clear()
66
+
67
+ # 重新构建映射
68
+ for intent_code, intent_info in self.global_intents.items():
69
+ if "en_code" in intent_info:
70
+ en_code = intent_info["en_code"]
71
+ # 支持单个英文代码或英文代码列表
72
+ if isinstance(en_code, list):
73
+ for code in en_code:
74
+ self.en_code_to_intent_code[code.lower()] = intent_code
75
+ else:
76
+ self.en_code_to_intent_code[en_code.lower()] = intent_code
77
+
78
+ logger.info(f"英文代码映射已更新,当前映射数量: {len(self.en_code_to_intent_code)}")
79
+
80
+ async def initialize(self):
81
+ """初始化MCP客户端并连接到服务器"""
82
+ # 避免重复初始化
83
+ if self.is_connected or self._initializing:
84
+ return self.is_connected
85
+
86
+ self._initializing = True
87
+
88
+ try:
89
+ self.mcp_client = MCPClient()
90
+ result = await self.mcp_client.connect(self.server_url)
91
+ if result:
92
+ logger.info("成功连接到MCP服务器")
93
+ self.is_connected = True
94
+ return True
95
+ else:
96
+ logger.warning("连接MCP服务器未成功返回")
97
+ self.is_connected = False
98
+ return False
99
+ except Exception as e:
100
+ logger.error(f"连接MCP服务器失败: {str(e)}")
101
+ self.is_connected = False
102
+ return False
103
+ finally:
104
+ self._initializing = False
105
+
106
+ async def recognize_intent_async(self, user_input: str) -> List[Dict[str, Any]]:
107
+ """异步识别用户输入中的意图"""
108
+ # 确保已连接
109
+ if not self.is_connected:
110
+ await self.initialize()
111
+ if not self.is_connected:
112
+ logger.error("无法连接到MCP服务器")
113
+ return []
114
+
115
+ try:
116
+ # 构建工具调用请求
117
+ tool_call = {
118
+ "id": "1",
119
+ "type": "function",
120
+ "function": {
121
+ "name": self.intent_tool_name,
122
+ "arguments": json.dumps({"user_input": user_input})
123
+ }
124
+ }
125
+
126
+ # 调用MCP工具
127
+ results = await self.mcp_client.call_tool([tool_call])
128
+
129
+ # 解析结果
130
+ if results:
131
+ # 遍历所有结果,找到tool类型的响应
132
+ for tool_result in results:
133
+ if tool_result.get("role") == "tool":
134
+ intent_result = tool_result.get("content", "[]")
135
+ logger.info(f"从MCP接收到的意图结果: {intent_result}")
136
+ parsed_result = self._parse_intent_result(intent_result)
137
+
138
+ # 增强带参数意图识别,无论MCP是否返回结果,都尝试提取参数
139
+ enhanced_result = []
140
+
141
+ # 1. 如果MCP返回了意图,为每个意图添加参数和名称
142
+ if parsed_result:
143
+ for intent_data in parsed_result:
144
+ # 如果没有参数,尝试提取,但仅对需要参数的意图进行提取
145
+ if 'arg' not in intent_data or not intent_data['arg']:
146
+ intent_code = intent_data['intent']
147
+ # 只有特定的意图类型才需要参数(如音量调节、速度调节等)
148
+ # 根据意图配置判断是否需要参数,或者直接检查意图代码
149
+ # 这里我们假设只有部分意图需要参数,大部分不需要
150
+ # 音量调节(2)、速度调节(7)等需要参数的意图列表
151
+ parameter_intents = ['2', '7'] # 可以根据实际情况扩展
152
+ if intent_code in parameter_intents:
153
+ intent_data['arg'] = self._extract_parameters(user_input, intent_code)
154
+ else:
155
+ # 不需要参数的意图,保持空列表
156
+ intent_data['arg'] = []
157
+
158
+ enhanced_result.append(intent_data)
159
+ else:
160
+ # 2. 如果MCP没有返回意图,尝试直接从文本中识别音量调节等常见意图
161
+ enhanced_result = self._direct_extract_intents(user_input)
162
+
163
+ logger.info(f"增强后的意图识别结果: {enhanced_result}")
164
+ return enhanced_result
165
+ except Exception as e:
166
+ logger.error(f"通过MCP识别意图失败: {str(e)}")
167
+
168
+ # 如果发生异常,尝试直接从文本中识别意图
169
+ return self._direct_extract_intents(user_input)
170
+
171
+ def _extract_parameters(self, text, intent):
172
+ """从文本中提取参数"""
173
+ text_lower = text.lower()
174
+
175
+ # 尝试提取数字参数,支持百分比形式
176
+ numbers = re.findall(r'\d+(?:\.\d+)?', text_lower)
177
+ if numbers:
178
+ return [numbers[0]] # 返回第一个匹配的数字
179
+
180
+ return []
181
+
182
+ def _direct_extract_intents(self, user_input):
183
+ """直接从文本中提取常见意图,特别是带参数的意图"""
184
+ text_lower = user_input.lower()
185
+ result = []
186
+
187
+
188
+
189
+ # 直接使用从ai_llm.py传入的全局意图配置,实现集中管理
190
+ for intent_code, intent_info in self.global_intents.items():
191
+ # 检查意图是否已被识别(避免重复)
192
+ intent_already_identified = any(item["intent"] == intent_code for item in result)
193
+ if intent_already_identified:
194
+ continue
195
+
196
+ # 检查关键词
197
+ for keyword in intent_info["keywords"]:
198
+ if keyword in text_lower:
199
+ # 提取参数
200
+ args = self._extract_parameters(text_lower, intent_code)
201
+ result.append({
202
+ "intent": intent_code,
203
+ "arg": args
204
+ })
205
+ break
206
+
207
+ return result
208
+
209
+ def recognize_intent(self, user_input: str) -> List[Dict[str, Any]]:
210
+ """同步识别用户输入中的意图(供非异步代码调用)- 仅使用MCP,不包含回退机制"""
211
+ try:
212
+ # 增强日志记录
213
+ logger.info(f"开始意图识别: {user_input}")
214
+
215
+ # 如果事件循环已存在,使用现有的
216
+ try:
217
+ loop = asyncio.get_event_loop()
218
+ if loop.is_closed():
219
+ raise RuntimeError("Event loop is closed")
220
+ except (RuntimeError, AssertionError):
221
+ # 创建新的事件循环
222
+ loop = asyncio.new_event_loop()
223
+ asyncio.set_event_loop(loop)
224
+
225
+ # 尝试通过MCP识别
226
+ result = loop.run_until_complete(self.recognize_intent_async(user_input))
227
+ logger.info(f"意图识别完成,结果: {result}")
228
+ return result
229
+ except Exception as e:
230
+ logger.error(f"同步调用MCP意图识别失败: {str(e)}")
231
+ return []
232
+
233
+
234
+
235
+ def _parse_intent_result(self, intent_text: str) -> List[Dict[str, Any]]:
236
+ """解析意图识别结果,与FastIntentRecognizer保持一致"""
237
+ try:
238
+ # 处理可能的格式问题
239
+ if not intent_text or intent_text.strip() == "[]":
240
+ return []
241
+
242
+ # 第一步:确保所有双花括号都被替换为单花括号
243
+ intent_text = intent_text.replace("{{", "{").replace("}}", "}")
244
+
245
+ # 第二步:尝试多种解析策略
246
+ # 策略1:尝试作为单个JSON对象解析
247
+ try:
248
+ if intent_text.strip().startswith("{"):
249
+ intent_data = json.loads(intent_text)
250
+ # 确保返回的格式与FastIntentRecognizer一致
251
+ if "intent" in intent_data:
252
+ return [intent_data]
253
+ # 如果字段名为intent_code,则转换为intent
254
+ elif "intent_code" in intent_data:
255
+ converted = {"intent": intent_data["intent_code"]}
256
+ if "arg" in intent_data:
257
+ converted["arg"] = intent_data["arg"]
258
+ return [converted]
259
+ except Exception as e1:
260
+ logger.warning(f"策略1解析失败: {str(e1)}")
261
+
262
+ # 策略2:尝试作为JSON数组解析
263
+ try:
264
+ if not intent_text.strip().startswith("["):
265
+ formatted_text = "[" + intent_text + "]"
266
+ intents = json.loads(formatted_text)
267
+ else:
268
+ intents = json.loads(intent_text)
269
+
270
+ # 确保返回的格式与FastIntentRecognizer一致
271
+ if isinstance(intents, list):
272
+ result = []
273
+ for intent in intents:
274
+ if isinstance(intent, dict):
275
+ if "intent" in intent:
276
+ result.append(intent)
277
+ elif "intent_code" in intent:
278
+ converted = {"intent": intent["intent_code"]}
279
+ if "arg" in intent:
280
+ converted["arg"] = intent["arg"]
281
+ result.append(converted)
282
+ return result if result else intents
283
+ elif isinstance(intents, dict):
284
+ if "intent" in intents:
285
+ return [intents]
286
+ elif "intent_code" in intents:
287
+ converted = {"intent": intents["intent_code"]}
288
+ if "arg" in intents:
289
+ converted["arg"] = intents["arg"]
290
+ return [converted]
291
+ return [intents]
292
+ except Exception as e2:
293
+ logger.warning(f"策略2解析失败: {str(e2)}")
294
+
295
+ # 策略3:处理可能包含多个意图的情况
296
+ try:
297
+ # 去除所有空白字符
298
+ clean_text = ''.join(intent_text.split())
299
+
300
+ if ",{" in clean_text:
301
+ # 分割多个意图
302
+ intent_parts = clean_text.split(",")
303
+ intents = []
304
+ for i, part in enumerate(intent_parts):
305
+ try:
306
+ # 确保每个部分都是有效的JSON对象
307
+ if not part.startswith("{"):
308
+ part = "{" + part
309
+ if not part.endswith("}"):
310
+ part = part + "}"
311
+ intent_data = json.loads(part)
312
+ # 转换格式
313
+ if "intent" in intent_data:
314
+ intents.append(intent_data)
315
+ elif "intent_code" in intent_data:
316
+ converted = {"intent": intent_data["intent_code"]}
317
+ if "arg" in intent_data:
318
+ converted["arg"] = intent_data["arg"]
319
+ intents.append(converted)
320
+ except Exception as e_inner:
321
+ logger.warning(f"解析第{i+1}个意图失败: {str(e_inner)}")
322
+ pass
323
+ if intents:
324
+ return intents
325
+ except Exception as e3:
326
+ logger.warning(f"策略3解析失败: {str(e3)}")
327
+
328
+ # 策略4:尝试修复格式后再解析
329
+ try:
330
+ # 去除所有空白字符
331
+ clean_text = ''.join(intent_text.split())
332
+
333
+ # 确保是对象或数组格式
334
+ if not clean_text.startswith("{") and not clean_text.startswith("["):
335
+ # 尝试添加对象括号
336
+ if clean_text.startswith("intent") or ":" in clean_text:
337
+ clean_text = "{" + clean_text + "}"
338
+ intent_data = json.loads(clean_text)
339
+ # 转换格式
340
+ if "intent" in intent_data:
341
+ return [intent_data]
342
+ elif "intent_code" in intent_data:
343
+ intent_code_value = intent_data["intent_code"]
344
+ if isinstance(intent_code_value, str) and intent_code_value.isalpha():
345
+ numeric_code = self._get_intent_code_from_en_code(intent_code_value)
346
+ if numeric_code:
347
+ intent_code_value = numeric_code
348
+ converted = {"intent": intent_code_value}
349
+ if "arg" in intent_data:
350
+ converted["arg"] = intent_data["arg"]
351
+ return [converted]
352
+ else:
353
+ # 再次尝试解析
354
+ intent_data = json.loads(clean_text)
355
+ if isinstance(intent_data, list):
356
+ # 检查列表中的每个意图
357
+ for item in intent_data:
358
+ if isinstance(item, dict) and "intent" in item:
359
+ intent_value = item["intent"]
360
+ if isinstance(intent_value, str) and intent_value.isalpha():
361
+ numeric_code = self._get_intent_code_from_en_code(intent_value)
362
+ if numeric_code:
363
+ item["intent"] = numeric_code
364
+ return intent_data
365
+ else:
366
+ if isinstance(intent_data, dict) and "intent" in intent_data:
367
+ # 检查intent是否为英文代码,如果是则转换为数字代码
368
+ intent_value = intent_data["intent"]
369
+ if isinstance(intent_value, str) and intent_value.isalpha():
370
+ numeric_code = self._get_intent_code_from_en_code(intent_value)
371
+ if numeric_code:
372
+ intent_data["intent"] = numeric_code
373
+ return [intent_data]
374
+ except Exception as e4:
375
+ logger.warning(f"策略4解析失败: {str(e4)}")
376
+
377
+ # 所有策略都失败
378
+ logger.error("所有解析尝试都失败了,返回空列表")
379
+ return []
380
+ except Exception as e:
381
+ logger.error(f"解析意图结果时发生严重错误: {str(e)}")
382
+ return []
383
+
384
+ async def close(self):
385
+ """关闭MCP连接"""
386
+ if self.mcp_client:
387
+ try:
388
+ # 优先尝试使用exit_stack关闭
389
+ if hasattr(self.mcp_client, 'exit_stack'):
390
+ await self.mcp_client.exit_stack.aclose()
391
+ # 如果有close方法,也调用它
392
+ elif hasattr(self.mcp_client, 'close'):
393
+ await self.mcp_client.close()
394
+ self.is_connected = False
395
+ logger.info("MCP连接已关闭")
396
+ except Exception as e:
397
+ logger.error(f"关闭MCP连接时出错: {str(e)}")
398
+
399
+
400
+ # 全局实例,在需要时才初始化
401
+ global_mcp_intent_recognizer = None
402
+
403
+
404
+ def get_mcp_intent_recognizer():
405
+ global global_mcp_intent_recognizer
406
+ if global_mcp_intent_recognizer is None:
407
+ global_mcp_intent_recognizer = MCPIntentRecognizer()
408
+ return global_mcp_intent_recognizer
File without changes
File without changes
@@ -0,0 +1,56 @@
1
+ dependencies = ['torch', 'torchaudio']
2
+ import torch
3
+ import os
4
+ import sys
5
+ sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'src'))
6
+ from silero_vad.utils_vad import (init_jit_model,
7
+ get_speech_timestamps,
8
+ save_audio,
9
+ read_audio,
10
+ VADIterator,
11
+ collect_chunks,
12
+ OnnxWrapper)
13
+
14
+
15
+ def versiontuple(v):
16
+ splitted = v.split('+')[0].split(".")
17
+ version_list = []
18
+ for i in splitted:
19
+ try:
20
+ version_list.append(int(i))
21
+ except:
22
+ version_list.append(0)
23
+ return tuple(version_list)
24
+
25
+
26
+ def silero_vad(onnx=False, force_onnx_cpu=False, opset_version=16):
27
+ """Silero Voice Activity Detector
28
+ Returns a model with a set of utils
29
+ Please see https://github.com/snakers4/silero-vad for usage examples
30
+ """
31
+ available_ops = [15, 16]
32
+ if onnx and opset_version not in available_ops:
33
+ raise Exception(f'Available ONNX opset_version: {available_ops}')
34
+
35
+ if not onnx:
36
+ installed_version = torch.__version__
37
+ supported_version = '1.12.0'
38
+ if versiontuple(installed_version) < versiontuple(supported_version):
39
+ raise Exception(f'Please install torch {supported_version} or greater ({installed_version} installed)')
40
+
41
+ model_dir = os.path.join(os.path.dirname(__file__), 'src', 'silero_vad', 'data')
42
+ if onnx:
43
+ if opset_version == 16:
44
+ model_name = 'silero_vad.onnx'
45
+ else:
46
+ model_name = f'silero_vad_16k_op{opset_version}.onnx'
47
+ model = OnnxWrapper(os.path.join(model_dir, model_name), force_onnx_cpu)
48
+ else:
49
+ model = init_jit_model(os.path.join(model_dir, 'silero_vad.jit'))
50
+ utils = (get_speech_timestamps,
51
+ save_audio,
52
+ read_audio,
53
+ VADIterator,
54
+ collect_chunks)
55
+
56
+ return model, utils
@@ -0,0 +1 @@
1
+ # -*- coding:utf-8 -*-
File without changes
@@ -0,0 +1,178 @@
1
+ # -*- coding: utf-8 -*-
2
+ import requests
3
+ import hmac
4
+ import hashlib
5
+ import base64
6
+ import time
7
+ import random
8
+ import os
9
+ import json
10
+ from common import credential
11
+
12
+ #录音识别极速版使用
13
+ class FlashRecognitionRequest:
14
+ def __init__(self, engine_type):
15
+ self.engine_type = engine_type
16
+ self.speaker_diarization = 0
17
+ self.hotword_id = ""
18
+ self.hotword_list = ""
19
+ self.input_sample_rate = 0
20
+ self.customization_id = ""
21
+ self.filter_dirty = 0
22
+ self.filter_modal = 0
23
+ self.filter_punc = 0
24
+ self.convert_num_mode = 1
25
+ self.word_info = 0
26
+ self.voice_format = ""
27
+ self.first_channel_only = 1
28
+ self.reinforce_hotword = 0
29
+ self.sentence_max_length = 0
30
+
31
+ def set_first_channel_only(self, first_channel_only):
32
+ self.first_channel_only = first_channel_only
33
+
34
+ def set_speaker_diarization(self, speaker_diarization):
35
+ self.speaker_diarization = speaker_diarization
36
+
37
+ def set_filter_dirty(self, filter_dirty):
38
+ self.filter_dirty = filter_dirty
39
+
40
+ def set_filter_modal(self, filter_modal):
41
+ self.filter_modal = filter_modal
42
+
43
+ def set_filter_punc(self, filter_punc):
44
+ self.filter_punc = filter_punc
45
+
46
+ def set_convert_num_mode(self, convert_num_mode):
47
+ self.convert_num_mode = convert_num_mode
48
+
49
+ def set_word_info(self, word_info):
50
+ self.word_info = word_info
51
+
52
+ def set_hotword_id(self, hotword_id):
53
+ self.hotword_id = hotword_id
54
+
55
+ def set_hotword_list(self, hotword_list):
56
+ self.hotword_list = hotword_list
57
+
58
+ def set_input_sample_rate(self, input_sample_rate):
59
+ self.input_sample_rate = input_sample_rate
60
+
61
+ def set_customization_id(self, customization_id):
62
+ self.customization_id = customization_id
63
+
64
+ def set_voice_format(self, voice_format):
65
+ self.voice_format = voice_format
66
+
67
+ def set_sentence_max_length(self, sentence_max_length):
68
+ self.sentence_max_length = sentence_max_length
69
+
70
+ def set_reinforce_hotword(self, reinforce_hotword):
71
+ self.reinforce_hotword = reinforce_hotword
72
+
73
+
74
+
75
+ class FlashRecognizer:
76
+ '''
77
+ reponse:
78
+ 字段名 类型
79
+ request_id string
80
+ status Integer
81
+ message String
82
+ audio_duration Integer
83
+ flash_result Result Array
84
+
85
+ Result的结构体格式为:
86
+ text String
87
+ channel_id Integer
88
+ sentence_list Sentence Array
89
+
90
+ Sentence的结构体格式为:
91
+ text String
92
+ start_time Integer
93
+ end_time Integer
94
+ speaker_id Integer
95
+ word_list Word Array
96
+
97
+ Word的类型为:
98
+ word String
99
+ start_time Integer
100
+ end_time Integer
101
+ stable_flag: Integer
102
+ '''
103
+
104
+ def __init__(self, appid, credential):
105
+ self.credential = credential
106
+ self.appid = appid
107
+
108
+ def _format_sign_string(self, param):
109
+ signstr = "POSTasr.cloud.tencent.com/asr/flash/v1/"
110
+ for t in param:
111
+ if 'appid' in t:
112
+ signstr += str(t[1])
113
+ break
114
+ signstr += "?"
115
+ for x in param:
116
+ tmp = x
117
+ if 'appid' in x:
118
+ continue
119
+ for t in tmp:
120
+ signstr += str(t)
121
+ signstr += "="
122
+ signstr = signstr[:-1]
123
+ signstr += "&"
124
+ signstr = signstr[:-1]
125
+ return signstr
126
+
127
+ def _build_header(self):
128
+ header = dict()
129
+ header["Host"] = "asr.cloud.tencent.com"
130
+ return header
131
+
132
+ def _sign(self, signstr, secret_key):
133
+ hmacstr = hmac.new(secret_key.encode('utf-8'),
134
+ signstr.encode('utf-8'), hashlib.sha1).digest()
135
+ s = base64.b64encode(hmacstr)
136
+ s = s.decode('utf-8')
137
+ return s
138
+
139
+ def _build_req_with_signature(self, secret_key, params, header):
140
+ query = sorted(params.items(), key=lambda d: d[0])
141
+ signstr = self._format_sign_string(query)
142
+ signature = self._sign(signstr, secret_key)
143
+ header["Authorization"] = signature
144
+ requrl = "https://"
145
+ requrl += signstr[4::]
146
+ return requrl
147
+
148
+ def _create_query_arr(self, req):
149
+ query_arr = dict()
150
+ query_arr['appid'] = self.appid
151
+ query_arr['secretid'] = self.credential.secret_id
152
+ query_arr['timestamp'] = str(int(time.time()))
153
+ query_arr['engine_type'] = req.engine_type
154
+ query_arr['voice_format'] = req.voice_format
155
+ query_arr['speaker_diarization'] = req.speaker_diarization
156
+ if req.hotword_id != "":
157
+ query_arr['hotword_id'] = req.hotword_id
158
+ if req.hotword_list != "":
159
+ query_arr['hotword_list'] = req.hotword_list
160
+ if req.input_sample_rate != 0:
161
+ query_arr['input_sample_rate'] = req.input_sample_rate
162
+ query_arr['customization_id'] = req.customization_id
163
+ query_arr['filter_dirty'] = req.filter_dirty
164
+ query_arr['filter_modal'] = req.filter_modal
165
+ query_arr['filter_punc'] = req.filter_punc
166
+ query_arr['convert_num_mode'] = req.convert_num_mode
167
+ query_arr['word_info'] = req.word_info
168
+ query_arr['first_channel_only'] = req.first_channel_only
169
+ query_arr['reinforce_hotword'] = req.reinforce_hotword
170
+ query_arr['sentence_max_length'] = req.sentence_max_length
171
+ return query_arr
172
+
173
+ def recognize(self, req, data):
174
+ header = self._build_header()
175
+ query_arr = self._create_query_arr(req)
176
+ req_url = self._build_req_with_signature(self.credential.secret_key, query_arr, header)
177
+ r = requests.post(req_url, headers=header, data=data)
178
+ return r.text