smartpi 0.1.41__py3-none-any.whl → 0.1.42__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
smartpi/__init__.py CHANGED
@@ -4,5 +4,5 @@ from .base_driver import P1, P2, P3, P4, P5, P6, M1, M2, M3, M4, M5, M6
4
4
  __all__ = ["base_driver","gui","ultrasonic","touch_sensor","temperature","humidity","light_sensor","color_sensor","motor","servo","led","flash",
5
5
  "P1", "P2", "P3", "P4", "P5", "P6", "M1", "M2", "M3", "M4", "M5", "M6"]
6
6
 
7
- __version__ = "0.1.41"
7
+ __version__ = "0.1.42"
8
8
 
smartpi/ai_asr.py CHANGED
@@ -798,7 +798,7 @@ def init(appid: str, secret_id: str, secret_key: str, voice_format: int = 8, sam
798
798
  hotword_id: str = "", result_type: int = 0, slice_size: int = 960,
799
799
  vad_silence_timeout: int = 10000, vad_pause_timeout: int = 3000,
800
800
  vad_mini_volume: int = 1000, vad_mini_length: int = 500, debug: bool = False,
801
- vad_type: str = "energy", vad_threshold: float = 0.7,
801
+ vad_threshold: float = 0.7,
802
802
  sentence_callback=None, engine_model_type: str = "16k_zh",
803
803
  interruption_callback=None) -> bool:
804
804
  """初始化语音识别
@@ -817,7 +817,6 @@ def init(appid: str, secret_id: str, secret_key: str, voice_format: int = 8, sam
817
817
  vad_mini_volume (int): 最小音量阈值,默认1000
818
818
  vad_mini_length (int): 最小语音长度(毫秒),默认500
819
819
  debug (bool): 是否开启调试模式,默认False
820
- vad_type (str): VAD类型,默认"energy"(目前只支持energy silero不稳定慎用!)
821
820
  vad_threshold (float): VAD阈值,默认0.7
822
821
  sentence_callback (callable): 句子识别完成回调函数
823
822
  engine_model_type (str): 引擎模型类型,默认"16k_zh"
@@ -839,28 +838,30 @@ def init(appid: str, secret_id: str, secret_key: str, voice_format: int = 8, sam
839
838
  except ImportError:
840
839
  print("警告: ai_vad模块未找到")
841
840
  ai_vad = None
842
-
841
+
843
842
  if ai_vad is not None:
844
843
  try:
845
844
  vad = ai_vad.VADProviderBase.create_vad_instance(
846
- vad_type, threshold=vad_threshold
845
+ vad_mini_volume, vad_mini_length,
846
+ vad_pause_timeout, vad_silence_timeout,
847
+ vad_threshold
847
848
  )
848
- print(f"VAD实例创建成功,类型: {vad_type},阈值: {vad_threshold}")
849
+ print("VAD初始化成功,使用EnergyVADProvider")
849
850
  except Exception as e:
850
851
  print(f"VAD实例创建失败: {e}")
851
852
  vad = None
852
-
853
- _ai_asr_instance = TencentSpeechRecognizer(
854
- appid=appid,
855
- secret_id=secret_id,
856
- secret_key=secret_key,
857
- engine_model_type=engine_model_type,
858
- sentence_callback=sentence_callback,
859
- vad=vad,
860
- interruption_callback=interruption_callback
861
- )
862
- print("语音识别初始化成功")
863
- return True
853
+
854
+ _ai_asr_instance = TencentSpeechRecognizer(
855
+ appid=appid,
856
+ secret_id=secret_id,
857
+ secret_key=secret_key,
858
+ engine_model_type=engine_model_type,
859
+ sentence_callback=sentence_callback,
860
+ vad=vad,
861
+ interruption_callback=interruption_callback
862
+ )
863
+ print("语音识别初始化成功")
864
+ return True
864
865
  except Exception as e:
865
866
  print(f"语音识别初始化失败: {e}")
866
867
  import traceback
smartpi/ai_vad.py CHANGED
@@ -3,143 +3,27 @@ import sys
3
3
  import os
4
4
  import time
5
5
  import numpy as np
6
- import torch
7
-
8
- # 设置环境变量以优化性能
9
- os.environ['OMP_NUM_THREADS'] = '1'
10
- os.environ['KMP_AFFINITY'] = 'disabled'
11
6
 
12
7
  class VADProviderBase:
13
8
  """VAD provider base class"""
14
9
  @staticmethod
15
10
  def create_vad_instance(
16
- class_name: str = "silero",
17
- model_dir=None,
18
- threshold=0.5,
19
- min_silence_duration_ms=800,
20
- energy_threshold=0.02
11
+ vad_mini_volume=1000,
12
+ vad_mini_length=500,
13
+ vad_pause_timeout=3000,
14
+ vad_silence_timeout=10000,
15
+ threshold=0.02
21
16
  ):
22
- """Factory method to create VAD instance"""
23
- if class_name == "silero":
24
- try:
25
- return SileroVADProvider(
26
- model_dir=model_dir,
27
- threshold=threshold,
28
- min_silence_duration_ms=min_silence_duration_ms
29
- )
30
- except Exception as e:
31
- print(f"Failed to create SileroVAD instance: {e}")
32
- print("Falling back to EnergyVADProvider...")
33
- return EnergyVADProvider(
34
- threshold=energy_threshold,
35
- min_silence_duration_ms=min_silence_duration_ms
36
- )
37
- elif class_name == "energy":
38
- return EnergyVADProvider(
39
- threshold=energy_threshold,
40
- min_silence_duration_ms=min_silence_duration_ms
41
- )
42
- raise ValueError(f"Unsupported VAD type: {class_name}")
43
-
44
- class SileroVADProvider(VADProviderBase):
45
- """Silero VAD provider"""
46
- def __init__(self, model_dir=None, threshold=0.5, min_silence_duration_ms=800):
47
- # 获取模型目录,默认为当前文件所在目录下的models/snakers4_silero-vad
48
- if not model_dir:
49
- model_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "models/snakers4_silero-vad")
50
-
51
- print(f"Initializing SileroVAD: model_dir={model_dir}, threshold={threshold}, min_silence_duration_ms={min_silence_duration_ms}")
52
-
53
- # 加载模型
54
- try:
55
- # 尝试检查torchaudio是否安装
56
- import importlib.util
57
- torchaudio_available = importlib.util.find_spec('torchaudio') is not None
58
-
59
- if torchaudio_available:
60
- # 如果有torchaudio,使用默认的PyTorch模型
61
- self.model, self.utils = torch.hub.load(
62
- repo_or_dir=model_dir,
63
- source="local",
64
- model="silero_vad",
65
- force_reload=False,
66
- )
67
- else:
68
- # 如果没有torchaudio,尝试使用ONNX版本
69
- print("torchaudio not found, trying ONNX version...")
70
- self.model, self.utils = torch.hub.load(
71
- repo_or_dir=model_dir,
72
- source="local",
73
- model="silero_vad",
74
- onnx=True,
75
- force_reload=False,
76
- )
77
- except Exception as e:
78
- print(f"Failed to load Silero model (both PyTorch and ONNX versions): {e}")
79
- # 降级到能量检测
80
- raise Exception("Model loading failed, will use energy detection")
81
-
82
- # 初始化VAD配置,使用默认值作为备选
83
- self.vad_threshold = float(threshold) if threshold else 0.5
84
- self.silence_threshold_ms = int(min_silence_duration_ms) if min_silence_duration_ms else 800
85
- print("SileroVAD初始化成功")
86
-
87
- def is_vad(self, conn, audio_data):
88
- """检测音频数据中的语音活动"""
89
- try:
90
- # 确保conn对象有必要的属性
91
- if not hasattr(conn, 'client_audio_buffer'):
92
- conn.client_audio_buffer = []
93
- if not hasattr(conn, 'client_have_voice'):
94
- conn.client_have_voice = False
95
- if not hasattr(conn, 'client_have_voice_last_time'):
96
- conn.client_have_voice_last_time = 0
97
- if not hasattr(conn, 'client_voice_stop'):
98
- conn.client_voice_stop = False
99
-
100
- # 将新数据加入缓冲区
101
- conn.client_audio_buffer.extend(audio_data)
102
-
103
- # 处理缓冲区中的完整帧(每次处理512采样点,每个采样点2字节)
104
- client_have_voice = False
105
- while len(conn.client_audio_buffer) >= 512 * 2:
106
- # 提取前512个采样点(1024字节)
107
- chunk = bytes(conn.client_audio_buffer[:512 * 2])
108
- conn.client_audio_buffer = conn.client_audio_buffer[512 * 2:]
109
-
110
- # 转换为模型需要的格式
111
- audio_int16 = np.frombuffer(chunk, dtype=np.int16)
112
- audio_float32 = audio_int16.astype(np.float32) / 32768.0
113
-
114
- # 检测语音活动
115
- with torch.no_grad():
116
- speech_prob = self.model(torch.from_numpy(audio_float32), 16000).item()
117
-
118
- client_have_voice = speech_prob >= self.vad_threshold
119
-
120
- # 更新VAD状态
121
- if client_have_voice:
122
- conn.client_have_voice = True
123
- conn.client_have_voice_last_time = time.time() * 1000
124
- conn.client_voice_stop = False
125
- elif conn.client_have_voice:
126
- # 检查静默时间
127
- stop_duration = time.time() * 1000 - conn.client_have_voice_last_time
128
- if stop_duration >= self.silence_threshold_ms:
129
- conn.client_have_voice = False
130
- conn.client_voice_stop = True
131
-
132
- return client_have_voice
133
- except Exception as e:
134
- print(f"VAD detection error: {e}")
135
- return False
136
-
137
- def reset_states(self):
138
- """Reset VAD states"""
139
- pass # States are now stored in conn object, no need to reset here
17
+ """Factory method to create VAD instance - always returns EnergyVADProvider"""
18
+ # 忽略vad_mini_volume, vad_mini_length, vad_silence_timeout参数
19
+ # 将vad_pause_timeout作为min_silence_duration_ms传递
20
+ return EnergyVADProvider(
21
+ threshold=threshold,
22
+ min_silence_duration_ms=vad_pause_timeout
23
+ )
140
24
 
141
25
  class EnergyVADProvider(VADProviderBase):
142
- """Audio energy-based VAD provider (fallback solution)"""
26
+ """Audio energy-based VAD provider"""
143
27
  def __init__(self, threshold=0.02, min_silence_duration_ms=300):
144
28
  print(f"Initializing EnergyVAD: threshold={threshold}, min_silence_duration_ms={min_silence_duration_ms}")
145
29
  # 初始化VAD配置,使用默认值作为备选
@@ -5,7 +5,7 @@ import onnx
5
5
  import json
6
6
  import cv2
7
7
  import time # 用于实时耗时计算
8
- from lib.posenet_utils import get_posenet_output
8
+ from posenet_utils import get_posenet_output
9
9
 
10
10
 
11
11
  class PoseWorkflow:
@@ -5,7 +5,7 @@ import json
5
5
  import os
6
6
  import time
7
7
  from rknnlite.api import RKNNLite # RKNN核心库
8
- from lib.posenet_utils import get_posenet_output # 姿态关键点提取逻辑(需支持返回3个值)
8
+ from posenet_utils import get_posenet_output # 姿态关键点提取逻辑(需支持返回3个值)
9
9
 
10
10
 
11
11
  class PoseWorkflow:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: smartpi
3
- Version: 0.1.41
3
+ Version: 0.1.42
4
4
  Summary: A library use for H2-RCU
5
5
  Author: ZMROBO
6
6
  Classifier: Programming Language :: Python :: 3
@@ -1,9 +1,9 @@
1
- smartpi/__init__.py,sha256=ioz08SpgjM2Df14M57OTfaAqxYbI-A6h2BeKEQ--8GI,356
1
+ smartpi/__init__.py,sha256=7AdxZqsEjhv8Ht_bLXOoF3T3jGZ0TEnG-SVYdjs4NR0,356
2
2
  smartpi/_gui.py,sha256=ij-6HZAEIwdy_hvU7f0NkyQjx_-eephijlKbGUhf8Uo,2177
3
- smartpi/ai_asr.py,sha256=eoF5z4oRJWpSk13tqO2M2N7ZBCkNWHPhQDt4-uUo_ko,40079
3
+ smartpi/ai_asr.py,sha256=wxh_1Klh8vJottAt19jq3qpOmM_Cw4DHQEPKpsHGhmE,40063
4
4
  smartpi/ai_llm.py,sha256=-khBK2PMwbYUDvbaCTRCktE4dFloqfai4mHI-V8GEXM,36751
5
5
  smartpi/ai_tts.py,sha256=OjAJs3XOykiufXuEDiaD0coKsWjknPdhwRpRzGLDZKU,36061
6
- smartpi/ai_vad.py,sha256=RJBieDmH5A0FITfULhpcn6Vpy_NhVXK5s416D-Lm2nU,8983
6
+ smartpi/ai_vad.py,sha256=9J1xxuKF8GvojkL_JuXv7_xE8bQAo5D49Xasi8piKK8,3630
7
7
  smartpi/base_driver.py,sha256=nGkGp7cAgeNo7BijHMJKHfdVZKIKuZNTx9GqLFPP5TI,31486
8
8
  smartpi/camera.py,sha256=AVpZsMpW-24CP3BOfarjmRawMJdTOZY7Crq7FeLOqb4,3292
9
9
  smartpi/color_sensor.py,sha256=ckIXD81YnqPo6nENAnipNp3gY12FJ235QKj0e8Cul9E,521
@@ -20,13 +20,13 @@ smartpi/motor.py,sha256=DvFzREEzyRafGmSCagU6ASeoE8fnAsKYI4oYMrkXXJc,5351
20
20
  smartpi/move.py,sha256=s1ZnkFtp6SCBnxhBxp6qQjGbifdsY5hjVCwlarTsZto,6688
21
21
  smartpi/onnx_hand_workflow.py,sha256=ZCoaWC6GygZSrhM6jhsuB6qmQ6GiAFFrso6rKAGmue8,8157
22
22
  smartpi/onnx_image_workflow.py,sha256=-saM_NxR6yDz06xlWZOvHf6cq3zmtOCFhCyZTGqvuOk,6188
23
- smartpi/onnx_pose_workflow.py,sha256=7hoZ31XfZRAbgmdQbgfK-xePniMa5mDEggV12F-Uq5c,20970
23
+ smartpi/onnx_pose_workflow.py,sha256=w0zGlXXBgIcxwjLF7eYHPEePqRw3KXvKQUqzVd_UrzQ,20966
24
24
  smartpi/onnx_text_workflow.py,sha256=6l9MTT2T1-rNye3_dSHLI2U749Z94aoRdkSe6CNXfHw,7191
25
25
  smartpi/onnx_voice_workflow.py,sha256=jkMFzy3RUnLo8LZAuCUfsS3YCJWSZzZuiE4RFoQ2HZw,17440
26
26
  smartpi/posenet_utils.py,sha256=o3scK41Eqvftav4y4vp6_6HinQWNCLeLpArXAzqQ-7s,8983
27
27
  smartpi/rknn_hand_workflow.py,sha256=wsVN_PYP9M-1AFaN4yqrGksUBoamYfujW0nQq4nv3kU,10160
28
28
  smartpi/rknn_image_workflow.py,sha256=4lTtcdmQ9KN5WiEnHayvqAd-dA0tiap5YXIqAMn5SoI,18444
29
- smartpi/rknn_pose_workflow.py,sha256=LA6tXOI81R1IQhQvgBWLGV_I8Qa-ROUgXqj3kTEMfmc,27840
29
+ smartpi/rknn_pose_workflow.py,sha256=T09F9vcQ6-QG3PlFRpCI4AuTPzEl_s0C38wfMD21Ujo,27836
30
30
  smartpi/rknn_text_workflow.py,sha256=KNBSetj3tmlLxdZOm0yzbiDnjH8S5191fuxh5Mi-uCY,9632
31
31
  smartpi/rknn_voice_workflow.py,sha256=T8iRQWPtJYXqoHIZH2FiT1WLxwN3HQg4D-mg-5KvYdA,16326
32
32
  smartpi/servo.py,sha256=0p09Jk-IVk5nLXz2AqFvytiYSSe4sMxdy1FaNMQijoY,5770
@@ -70,7 +70,7 @@ smartpi/text_gte_model/config/tokenizer_config.json,sha256=w5RiDifbeIYy6vyGX5v94
70
70
  smartpi/text_gte_model/config/vocab.txt,sha256=oi9hP3uz_8h8XoHNh6rgLnVdJbIEm75zKoSKM8HzsC8,84758
71
71
  smartpi/text_gte_model/gte/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
72
72
  smartpi/text_gte_model/gte/gte_model.onnx,sha256=XXYg6TUhzOx1SqAhp6ePDU0QgeK6DQEqHATMuQQJCNE,30468366
73
- smartpi-0.1.41.dist-info/METADATA,sha256=446DzYjpDU_MxEdwmQyiyx7pf6K-OAQIOW3sek2gnZU,614
74
- smartpi-0.1.41.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
75
- smartpi-0.1.41.dist-info/top_level.txt,sha256=PoLhUCmWAiQUg5UeN2fS-Y1iQyBbF2rdUlizXtpHGRQ,8
76
- smartpi-0.1.41.dist-info/RECORD,,
73
+ smartpi-0.1.42.dist-info/METADATA,sha256=FxIXEB2WMTsKc8Fs8ukVfdnD3--vqy7fOEXFT5AQLq8,614
74
+ smartpi-0.1.42.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
75
+ smartpi-0.1.42.dist-info/top_level.txt,sha256=PoLhUCmWAiQUg5UeN2fS-Y1iQyBbF2rdUlizXtpHGRQ,8
76
+ smartpi-0.1.42.dist-info/RECORD,,