smartpi 0.1.41__py3-none-any.whl → 0.1.42__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- smartpi/__init__.py +1 -1
- smartpi/ai_asr.py +18 -17
- smartpi/ai_vad.py +13 -129
- smartpi/onnx_pose_workflow.py +1 -1
- smartpi/rknn_pose_workflow.py +1 -1
- {smartpi-0.1.41.dist-info → smartpi-0.1.42.dist-info}/METADATA +1 -1
- {smartpi-0.1.41.dist-info → smartpi-0.1.42.dist-info}/RECORD +9 -9
- {smartpi-0.1.41.dist-info → smartpi-0.1.42.dist-info}/WHEEL +0 -0
- {smartpi-0.1.41.dist-info → smartpi-0.1.42.dist-info}/top_level.txt +0 -0
smartpi/__init__.py
CHANGED
|
@@ -4,5 +4,5 @@ from .base_driver import P1, P2, P3, P4, P5, P6, M1, M2, M3, M4, M5, M6
|
|
|
4
4
|
__all__ = ["base_driver","gui","ultrasonic","touch_sensor","temperature","humidity","light_sensor","color_sensor","motor","servo","led","flash",
|
|
5
5
|
"P1", "P2", "P3", "P4", "P5", "P6", "M1", "M2", "M3", "M4", "M5", "M6"]
|
|
6
6
|
|
|
7
|
-
__version__ = "0.1.
|
|
7
|
+
__version__ = "0.1.42"
|
|
8
8
|
|
smartpi/ai_asr.py
CHANGED
|
@@ -798,7 +798,7 @@ def init(appid: str, secret_id: str, secret_key: str, voice_format: int = 8, sam
|
|
|
798
798
|
hotword_id: str = "", result_type: int = 0, slice_size: int = 960,
|
|
799
799
|
vad_silence_timeout: int = 10000, vad_pause_timeout: int = 3000,
|
|
800
800
|
vad_mini_volume: int = 1000, vad_mini_length: int = 500, debug: bool = False,
|
|
801
|
-
|
|
801
|
+
vad_threshold: float = 0.7,
|
|
802
802
|
sentence_callback=None, engine_model_type: str = "16k_zh",
|
|
803
803
|
interruption_callback=None) -> bool:
|
|
804
804
|
"""初始化语音识别
|
|
@@ -817,7 +817,6 @@ def init(appid: str, secret_id: str, secret_key: str, voice_format: int = 8, sam
|
|
|
817
817
|
vad_mini_volume (int): 最小音量阈值,默认1000
|
|
818
818
|
vad_mini_length (int): 最小语音长度(毫秒),默认500
|
|
819
819
|
debug (bool): 是否开启调试模式,默认False
|
|
820
|
-
vad_type (str): VAD类型,默认"energy"(目前只支持energy silero不稳定慎用!)
|
|
821
820
|
vad_threshold (float): VAD阈值,默认0.7
|
|
822
821
|
sentence_callback (callable): 句子识别完成回调函数
|
|
823
822
|
engine_model_type (str): 引擎模型类型,默认"16k_zh"
|
|
@@ -839,28 +838,30 @@ def init(appid: str, secret_id: str, secret_key: str, voice_format: int = 8, sam
|
|
|
839
838
|
except ImportError:
|
|
840
839
|
print("警告: ai_vad模块未找到")
|
|
841
840
|
ai_vad = None
|
|
842
|
-
|
|
841
|
+
|
|
843
842
|
if ai_vad is not None:
|
|
844
843
|
try:
|
|
845
844
|
vad = ai_vad.VADProviderBase.create_vad_instance(
|
|
846
|
-
|
|
845
|
+
vad_mini_volume, vad_mini_length,
|
|
846
|
+
vad_pause_timeout, vad_silence_timeout,
|
|
847
|
+
vad_threshold
|
|
847
848
|
)
|
|
848
|
-
print(
|
|
849
|
+
print("VAD初始化成功,使用EnergyVADProvider")
|
|
849
850
|
except Exception as e:
|
|
850
851
|
print(f"VAD实例创建失败: {e}")
|
|
851
852
|
vad = None
|
|
852
|
-
|
|
853
|
-
|
|
854
|
-
|
|
855
|
-
|
|
856
|
-
|
|
857
|
-
|
|
858
|
-
|
|
859
|
-
|
|
860
|
-
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
|
|
853
|
+
|
|
854
|
+
_ai_asr_instance = TencentSpeechRecognizer(
|
|
855
|
+
appid=appid,
|
|
856
|
+
secret_id=secret_id,
|
|
857
|
+
secret_key=secret_key,
|
|
858
|
+
engine_model_type=engine_model_type,
|
|
859
|
+
sentence_callback=sentence_callback,
|
|
860
|
+
vad=vad,
|
|
861
|
+
interruption_callback=interruption_callback
|
|
862
|
+
)
|
|
863
|
+
print("语音识别初始化成功")
|
|
864
|
+
return True
|
|
864
865
|
except Exception as e:
|
|
865
866
|
print(f"语音识别初始化失败: {e}")
|
|
866
867
|
import traceback
|
smartpi/ai_vad.py
CHANGED
|
@@ -3,143 +3,27 @@ import sys
|
|
|
3
3
|
import os
|
|
4
4
|
import time
|
|
5
5
|
import numpy as np
|
|
6
|
-
import torch
|
|
7
|
-
|
|
8
|
-
# 设置环境变量以优化性能
|
|
9
|
-
os.environ['OMP_NUM_THREADS'] = '1'
|
|
10
|
-
os.environ['KMP_AFFINITY'] = 'disabled'
|
|
11
6
|
|
|
12
7
|
class VADProviderBase:
|
|
13
8
|
"""VAD provider base class"""
|
|
14
9
|
@staticmethod
|
|
15
10
|
def create_vad_instance(
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
11
|
+
vad_mini_volume=1000,
|
|
12
|
+
vad_mini_length=500,
|
|
13
|
+
vad_pause_timeout=3000,
|
|
14
|
+
vad_silence_timeout=10000,
|
|
15
|
+
threshold=0.02
|
|
21
16
|
):
|
|
22
|
-
"""Factory method to create VAD instance"""
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
)
|
|
30
|
-
except Exception as e:
|
|
31
|
-
print(f"Failed to create SileroVAD instance: {e}")
|
|
32
|
-
print("Falling back to EnergyVADProvider...")
|
|
33
|
-
return EnergyVADProvider(
|
|
34
|
-
threshold=energy_threshold,
|
|
35
|
-
min_silence_duration_ms=min_silence_duration_ms
|
|
36
|
-
)
|
|
37
|
-
elif class_name == "energy":
|
|
38
|
-
return EnergyVADProvider(
|
|
39
|
-
threshold=energy_threshold,
|
|
40
|
-
min_silence_duration_ms=min_silence_duration_ms
|
|
41
|
-
)
|
|
42
|
-
raise ValueError(f"Unsupported VAD type: {class_name}")
|
|
43
|
-
|
|
44
|
-
class SileroVADProvider(VADProviderBase):
|
|
45
|
-
"""Silero VAD provider"""
|
|
46
|
-
def __init__(self, model_dir=None, threshold=0.5, min_silence_duration_ms=800):
|
|
47
|
-
# 获取模型目录,默认为当前文件所在目录下的models/snakers4_silero-vad
|
|
48
|
-
if not model_dir:
|
|
49
|
-
model_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "models/snakers4_silero-vad")
|
|
50
|
-
|
|
51
|
-
print(f"Initializing SileroVAD: model_dir={model_dir}, threshold={threshold}, min_silence_duration_ms={min_silence_duration_ms}")
|
|
52
|
-
|
|
53
|
-
# 加载模型
|
|
54
|
-
try:
|
|
55
|
-
# 尝试检查torchaudio是否安装
|
|
56
|
-
import importlib.util
|
|
57
|
-
torchaudio_available = importlib.util.find_spec('torchaudio') is not None
|
|
58
|
-
|
|
59
|
-
if torchaudio_available:
|
|
60
|
-
# 如果有torchaudio,使用默认的PyTorch模型
|
|
61
|
-
self.model, self.utils = torch.hub.load(
|
|
62
|
-
repo_or_dir=model_dir,
|
|
63
|
-
source="local",
|
|
64
|
-
model="silero_vad",
|
|
65
|
-
force_reload=False,
|
|
66
|
-
)
|
|
67
|
-
else:
|
|
68
|
-
# 如果没有torchaudio,尝试使用ONNX版本
|
|
69
|
-
print("torchaudio not found, trying ONNX version...")
|
|
70
|
-
self.model, self.utils = torch.hub.load(
|
|
71
|
-
repo_or_dir=model_dir,
|
|
72
|
-
source="local",
|
|
73
|
-
model="silero_vad",
|
|
74
|
-
onnx=True,
|
|
75
|
-
force_reload=False,
|
|
76
|
-
)
|
|
77
|
-
except Exception as e:
|
|
78
|
-
print(f"Failed to load Silero model (both PyTorch and ONNX versions): {e}")
|
|
79
|
-
# 降级到能量检测
|
|
80
|
-
raise Exception("Model loading failed, will use energy detection")
|
|
81
|
-
|
|
82
|
-
# 初始化VAD配置,使用默认值作为备选
|
|
83
|
-
self.vad_threshold = float(threshold) if threshold else 0.5
|
|
84
|
-
self.silence_threshold_ms = int(min_silence_duration_ms) if min_silence_duration_ms else 800
|
|
85
|
-
print("SileroVAD初始化成功")
|
|
86
|
-
|
|
87
|
-
def is_vad(self, conn, audio_data):
|
|
88
|
-
"""检测音频数据中的语音活动"""
|
|
89
|
-
try:
|
|
90
|
-
# 确保conn对象有必要的属性
|
|
91
|
-
if not hasattr(conn, 'client_audio_buffer'):
|
|
92
|
-
conn.client_audio_buffer = []
|
|
93
|
-
if not hasattr(conn, 'client_have_voice'):
|
|
94
|
-
conn.client_have_voice = False
|
|
95
|
-
if not hasattr(conn, 'client_have_voice_last_time'):
|
|
96
|
-
conn.client_have_voice_last_time = 0
|
|
97
|
-
if not hasattr(conn, 'client_voice_stop'):
|
|
98
|
-
conn.client_voice_stop = False
|
|
99
|
-
|
|
100
|
-
# 将新数据加入缓冲区
|
|
101
|
-
conn.client_audio_buffer.extend(audio_data)
|
|
102
|
-
|
|
103
|
-
# 处理缓冲区中的完整帧(每次处理512采样点,每个采样点2字节)
|
|
104
|
-
client_have_voice = False
|
|
105
|
-
while len(conn.client_audio_buffer) >= 512 * 2:
|
|
106
|
-
# 提取前512个采样点(1024字节)
|
|
107
|
-
chunk = bytes(conn.client_audio_buffer[:512 * 2])
|
|
108
|
-
conn.client_audio_buffer = conn.client_audio_buffer[512 * 2:]
|
|
109
|
-
|
|
110
|
-
# 转换为模型需要的格式
|
|
111
|
-
audio_int16 = np.frombuffer(chunk, dtype=np.int16)
|
|
112
|
-
audio_float32 = audio_int16.astype(np.float32) / 32768.0
|
|
113
|
-
|
|
114
|
-
# 检测语音活动
|
|
115
|
-
with torch.no_grad():
|
|
116
|
-
speech_prob = self.model(torch.from_numpy(audio_float32), 16000).item()
|
|
117
|
-
|
|
118
|
-
client_have_voice = speech_prob >= self.vad_threshold
|
|
119
|
-
|
|
120
|
-
# 更新VAD状态
|
|
121
|
-
if client_have_voice:
|
|
122
|
-
conn.client_have_voice = True
|
|
123
|
-
conn.client_have_voice_last_time = time.time() * 1000
|
|
124
|
-
conn.client_voice_stop = False
|
|
125
|
-
elif conn.client_have_voice:
|
|
126
|
-
# 检查静默时间
|
|
127
|
-
stop_duration = time.time() * 1000 - conn.client_have_voice_last_time
|
|
128
|
-
if stop_duration >= self.silence_threshold_ms:
|
|
129
|
-
conn.client_have_voice = False
|
|
130
|
-
conn.client_voice_stop = True
|
|
131
|
-
|
|
132
|
-
return client_have_voice
|
|
133
|
-
except Exception as e:
|
|
134
|
-
print(f"VAD detection error: {e}")
|
|
135
|
-
return False
|
|
136
|
-
|
|
137
|
-
def reset_states(self):
|
|
138
|
-
"""Reset VAD states"""
|
|
139
|
-
pass # States are now stored in conn object, no need to reset here
|
|
17
|
+
"""Factory method to create VAD instance - always returns EnergyVADProvider"""
|
|
18
|
+
# 忽略vad_mini_volume, vad_mini_length, vad_silence_timeout参数
|
|
19
|
+
# 将vad_pause_timeout作为min_silence_duration_ms传递
|
|
20
|
+
return EnergyVADProvider(
|
|
21
|
+
threshold=threshold,
|
|
22
|
+
min_silence_duration_ms=vad_pause_timeout
|
|
23
|
+
)
|
|
140
24
|
|
|
141
25
|
class EnergyVADProvider(VADProviderBase):
|
|
142
|
-
"""Audio energy-based VAD provider
|
|
26
|
+
"""Audio energy-based VAD provider"""
|
|
143
27
|
def __init__(self, threshold=0.02, min_silence_duration_ms=300):
|
|
144
28
|
print(f"Initializing EnergyVAD: threshold={threshold}, min_silence_duration_ms={min_silence_duration_ms}")
|
|
145
29
|
# 初始化VAD配置,使用默认值作为备选
|
smartpi/onnx_pose_workflow.py
CHANGED
smartpi/rknn_pose_workflow.py
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
smartpi/__init__.py,sha256=
|
|
1
|
+
smartpi/__init__.py,sha256=7AdxZqsEjhv8Ht_bLXOoF3T3jGZ0TEnG-SVYdjs4NR0,356
|
|
2
2
|
smartpi/_gui.py,sha256=ij-6HZAEIwdy_hvU7f0NkyQjx_-eephijlKbGUhf8Uo,2177
|
|
3
|
-
smartpi/ai_asr.py,sha256=
|
|
3
|
+
smartpi/ai_asr.py,sha256=wxh_1Klh8vJottAt19jq3qpOmM_Cw4DHQEPKpsHGhmE,40063
|
|
4
4
|
smartpi/ai_llm.py,sha256=-khBK2PMwbYUDvbaCTRCktE4dFloqfai4mHI-V8GEXM,36751
|
|
5
5
|
smartpi/ai_tts.py,sha256=OjAJs3XOykiufXuEDiaD0coKsWjknPdhwRpRzGLDZKU,36061
|
|
6
|
-
smartpi/ai_vad.py,sha256=
|
|
6
|
+
smartpi/ai_vad.py,sha256=9J1xxuKF8GvojkL_JuXv7_xE8bQAo5D49Xasi8piKK8,3630
|
|
7
7
|
smartpi/base_driver.py,sha256=nGkGp7cAgeNo7BijHMJKHfdVZKIKuZNTx9GqLFPP5TI,31486
|
|
8
8
|
smartpi/camera.py,sha256=AVpZsMpW-24CP3BOfarjmRawMJdTOZY7Crq7FeLOqb4,3292
|
|
9
9
|
smartpi/color_sensor.py,sha256=ckIXD81YnqPo6nENAnipNp3gY12FJ235QKj0e8Cul9E,521
|
|
@@ -20,13 +20,13 @@ smartpi/motor.py,sha256=DvFzREEzyRafGmSCagU6ASeoE8fnAsKYI4oYMrkXXJc,5351
|
|
|
20
20
|
smartpi/move.py,sha256=s1ZnkFtp6SCBnxhBxp6qQjGbifdsY5hjVCwlarTsZto,6688
|
|
21
21
|
smartpi/onnx_hand_workflow.py,sha256=ZCoaWC6GygZSrhM6jhsuB6qmQ6GiAFFrso6rKAGmue8,8157
|
|
22
22
|
smartpi/onnx_image_workflow.py,sha256=-saM_NxR6yDz06xlWZOvHf6cq3zmtOCFhCyZTGqvuOk,6188
|
|
23
|
-
smartpi/onnx_pose_workflow.py,sha256=
|
|
23
|
+
smartpi/onnx_pose_workflow.py,sha256=w0zGlXXBgIcxwjLF7eYHPEePqRw3KXvKQUqzVd_UrzQ,20966
|
|
24
24
|
smartpi/onnx_text_workflow.py,sha256=6l9MTT2T1-rNye3_dSHLI2U749Z94aoRdkSe6CNXfHw,7191
|
|
25
25
|
smartpi/onnx_voice_workflow.py,sha256=jkMFzy3RUnLo8LZAuCUfsS3YCJWSZzZuiE4RFoQ2HZw,17440
|
|
26
26
|
smartpi/posenet_utils.py,sha256=o3scK41Eqvftav4y4vp6_6HinQWNCLeLpArXAzqQ-7s,8983
|
|
27
27
|
smartpi/rknn_hand_workflow.py,sha256=wsVN_PYP9M-1AFaN4yqrGksUBoamYfujW0nQq4nv3kU,10160
|
|
28
28
|
smartpi/rknn_image_workflow.py,sha256=4lTtcdmQ9KN5WiEnHayvqAd-dA0tiap5YXIqAMn5SoI,18444
|
|
29
|
-
smartpi/rknn_pose_workflow.py,sha256=
|
|
29
|
+
smartpi/rknn_pose_workflow.py,sha256=T09F9vcQ6-QG3PlFRpCI4AuTPzEl_s0C38wfMD21Ujo,27836
|
|
30
30
|
smartpi/rknn_text_workflow.py,sha256=KNBSetj3tmlLxdZOm0yzbiDnjH8S5191fuxh5Mi-uCY,9632
|
|
31
31
|
smartpi/rknn_voice_workflow.py,sha256=T8iRQWPtJYXqoHIZH2FiT1WLxwN3HQg4D-mg-5KvYdA,16326
|
|
32
32
|
smartpi/servo.py,sha256=0p09Jk-IVk5nLXz2AqFvytiYSSe4sMxdy1FaNMQijoY,5770
|
|
@@ -70,7 +70,7 @@ smartpi/text_gte_model/config/tokenizer_config.json,sha256=w5RiDifbeIYy6vyGX5v94
|
|
|
70
70
|
smartpi/text_gte_model/config/vocab.txt,sha256=oi9hP3uz_8h8XoHNh6rgLnVdJbIEm75zKoSKM8HzsC8,84758
|
|
71
71
|
smartpi/text_gte_model/gte/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
72
72
|
smartpi/text_gte_model/gte/gte_model.onnx,sha256=XXYg6TUhzOx1SqAhp6ePDU0QgeK6DQEqHATMuQQJCNE,30468366
|
|
73
|
-
smartpi-0.1.
|
|
74
|
-
smartpi-0.1.
|
|
75
|
-
smartpi-0.1.
|
|
76
|
-
smartpi-0.1.
|
|
73
|
+
smartpi-0.1.42.dist-info/METADATA,sha256=FxIXEB2WMTsKc8Fs8ukVfdnD3--vqy7fOEXFT5AQLq8,614
|
|
74
|
+
smartpi-0.1.42.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
|
|
75
|
+
smartpi-0.1.42.dist-info/top_level.txt,sha256=PoLhUCmWAiQUg5UeN2fS-Y1iQyBbF2rdUlizXtpHGRQ,8
|
|
76
|
+
smartpi-0.1.42.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|