PyPI - smartpi - Versions diffs - 0.1.41__py3-none-any.whl → 0.1.42__py3-none-any.whl - Mend

smartpi 0.1.41py3-none-any.whl → 0.1.42py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

smartpi/__init__.py +1 -1
smartpi/ai_asr.py +18 -17
smartpi/ai_vad.py +13 -129
smartpi/onnx_pose_workflow.py +1 -1
smartpi/rknn_pose_workflow.py +1 -1
{smartpi-0.1.41.dist-info → smartpi-0.1.42.dist-info}/METADATA +1 -1
{smartpi-0.1.41.dist-info → smartpi-0.1.42.dist-info}/RECORD +9 -9
{smartpi-0.1.41.dist-info → smartpi-0.1.42.dist-info}/WHEEL +0 -0
{smartpi-0.1.41.dist-info → smartpi-0.1.42.dist-info}/top_level.txt +0 -0

smartpi/__init__.py CHANGED Viewed

@@ -4,5 +4,5 @@ from .base_driver import P1, P2, P3, P4, P5, P6, M1, M2, M3, M4, M5, M6
 __all__ = ["base_driver","gui","ultrasonic","touch_sensor","temperature","humidity","light_sensor","color_sensor","motor","servo","led","flash",
            "P1", "P2", "P3", "P4", "P5", "P6", "M1", "M2", "M3", "M4", "M5", "M6"]
-__version__ = "0.1.41"
+__version__ = "0.1.42"

smartpi/ai_asr.py CHANGED Viewed

@@ -798,7 +798,7 @@ def init(appid: str, secret_id: str, secret_key: str, voice_format: int = 8, sam
          hotword_id: str = "", result_type: int = 0, slice_size: int = 960,
          vad_silence_timeout: int = 10000, vad_pause_timeout: int = 3000,
          vad_mini_volume: int = 1000, vad_mini_length: int = 500, debug: bool = False,
-         vad_type: str = "energy", vad_threshold: float = 0.7,
+         vad_threshold: float = 0.7,
          sentence_callback=None, engine_model_type: str = "16k_zh",
          interruption_callback=None) -> bool:
     """初始化语音识别
@@ -817,7 +817,6 @@ def init(appid: str, secret_id: str, secret_key: str, voice_format: int = 8, sam
         vad_mini_volume (int): 最小音量阈值，默认1000
         vad_mini_length (int): 最小语音长度(毫秒)，默认500
         debug (bool): 是否开启调试模式，默认False
-        vad_type (str): VAD类型，默认"energy"（目前只支持energy silero不稳定慎用！）
         vad_threshold (float): VAD阈值，默认0.7
         sentence_callback (callable): 句子识别完成回调函数
         engine_model_type (str): 引擎模型类型，默认"16k_zh"
@@ -839,28 +838,30 @@ def init(appid: str, secret_id: str, secret_key: str, voice_format: int = 8, sam
             except ImportError:
                 print("警告: ai_vad模块未找到")
                 ai_vad = None
         if ai_vad is not None:
             try:
                 vad = ai_vad.VADProviderBase.create_vad_instance(
-                    vad_type, threshold=vad_threshold
+                    vad_mini_volume, vad_mini_length,
+                    vad_pause_timeout, vad_silence_timeout,
+                    vad_threshold
                 )
-                print(f"VAD实例创建成功，类型: {vad_type}，阈值: {vad_threshold}")
+                print("VAD初始化成功，使用EnergyVADProvider")
             except Exception as e:
                 print(f"VAD实例创建失败: {e}")
                 vad = None
-        _ai_asr_instance = TencentSpeechRecognizer(
-            appid=appid,
-            secret_id=secret_id,
-            secret_key=secret_key,
-            engine_model_type=engine_model_type,
-            sentence_callback=sentence_callback,
-            vad=vad,
-            interruption_callback=interruption_callback
-        )
-        print("语音识别初始化成功")
-        return True
+            _ai_asr_instance = TencentSpeechRecognizer(
+                appid=appid,
+                secret_id=secret_id,
+                secret_key=secret_key,
+                engine_model_type=engine_model_type,
+                sentence_callback=sentence_callback,
+                vad=vad,
+                interruption_callback=interruption_callback
+            )
+            print("语音识别初始化成功")
+            return True
     except Exception as e:
         print(f"语音识别初始化失败: {e}")
         import traceback

smartpi/ai_vad.py CHANGED Viewed

@@ -3,143 +3,27 @@ import sys
 import os
 import time
 import numpy as np
-import torch
-# 设置环境变量以优化性能
-os.environ['OMP_NUM_THREADS'] = '1'
-os.environ['KMP_AFFINITY'] = 'disabled'
 class VADProviderBase:
     """VAD provider base class"""
     @staticmethod
     def create_vad_instance(
-        class_name: str = "silero",
-        model_dir=None,
-        threshold=0.5,
-        min_silence_duration_ms=800,
-        energy_threshold=0.02
+        vad_mini_volume=1000,
+        vad_mini_length=500,
+        vad_pause_timeout=3000,
+        vad_silence_timeout=10000,
+        threshold=0.02
     ):
-        """Factory method to create VAD instance"""
-        if class_name == "silero":
-            try:
-                return SileroVADProvider(
-                    model_dir=model_dir,
-                    threshold=threshold,
-                    min_silence_duration_ms=min_silence_duration_ms
-                )
-            except Exception as e:
-                print(f"Failed to create SileroVAD instance: {e}")
-                print("Falling back to EnergyVADProvider...")
-                return EnergyVADProvider(
-                    threshold=energy_threshold,
-                    min_silence_duration_ms=min_silence_duration_ms
-                )
-        elif class_name == "energy":
-            return EnergyVADProvider(
-                threshold=energy_threshold,
-                min_silence_duration_ms=min_silence_duration_ms
-            )
-        raise ValueError(f"Unsupported VAD type: {class_name}")
-class SileroVADProvider(VADProviderBase):
-    """Silero VAD provider"""
-    def __init__(self, model_dir=None, threshold=0.5, min_silence_duration_ms=800):
-        # 获取模型目录，默认为当前文件所在目录下的models/snakers4_silero-vad
-        if not model_dir:
-            model_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "models/snakers4_silero-vad")
-        print(f"Initializing SileroVAD: model_dir={model_dir}, threshold={threshold}, min_silence_duration_ms={min_silence_duration_ms}")
-        # 加载模型
-        try:
-            # 尝试检查torchaudio是否安装
-            import importlib.util
-            torchaudio_available = importlib.util.find_spec('torchaudio') is not None
-            if torchaudio_available:
-                # 如果有torchaudio，使用默认的PyTorch模型
-                self.model, self.utils = torch.hub.load(
-                    repo_or_dir=model_dir,
-                    source="local",
-                    model="silero_vad",
-                    force_reload=False,
-                )
-            else:
-                # 如果没有torchaudio，尝试使用ONNX版本
-                print("torchaudio not found, trying ONNX version...")
-                self.model, self.utils = torch.hub.load(
-                    repo_or_dir=model_dir,
-                    source="local",
-                    model="silero_vad",
-                    onnx=True,
-                    force_reload=False,
-                )
-        except Exception as e:
-            print(f"Failed to load Silero model (both PyTorch and ONNX versions): {e}")
-            # 降级到能量检测
-            raise Exception("Model loading failed, will use energy detection")
-        # 初始化VAD配置，使用默认值作为备选
-        self.vad_threshold = float(threshold) if threshold else 0.5
-        self.silence_threshold_ms = int(min_silence_duration_ms) if min_silence_duration_ms else 800
-        print("SileroVAD初始化成功")
-    def is_vad(self, conn, audio_data):
-        """检测音频数据中的语音活动"""
-        try:
-            # 确保conn对象有必要的属性
-            if not hasattr(conn, 'client_audio_buffer'):
-                conn.client_audio_buffer = []
-            if not hasattr(conn, 'client_have_voice'):
-                conn.client_have_voice = False
-            if not hasattr(conn, 'client_have_voice_last_time'):
-                conn.client_have_voice_last_time = 0
-            if not hasattr(conn, 'client_voice_stop'):
-                conn.client_voice_stop = False
-            # 将新数据加入缓冲区
-            conn.client_audio_buffer.extend(audio_data)
-            # 处理缓冲区中的完整帧（每次处理512采样点，每个采样点2字节）
-            client_have_voice = False
-            while len(conn.client_audio_buffer) >= 512 * 2:
-                # 提取前512个采样点（1024字节）
-                chunk = bytes(conn.client_audio_buffer[:512 * 2])
-                conn.client_audio_buffer = conn.client_audio_buffer[512 * 2:]
-                # 转换为模型需要的格式
-                audio_int16 = np.frombuffer(chunk, dtype=np.int16)
-                audio_float32 = audio_int16.astype(np.float32) / 32768.0
-                # 检测语音活动
-                with torch.no_grad():
-                    speech_prob = self.model(torch.from_numpy(audio_float32), 16000).item()
-                client_have_voice = speech_prob >= self.vad_threshold
-                # 更新VAD状态
-                if client_have_voice:
-                    conn.client_have_voice = True
-                    conn.client_have_voice_last_time = time.time() * 1000
-                    conn.client_voice_stop = False
-                elif conn.client_have_voice:
-                    # 检查静默时间
-                    stop_duration = time.time() * 1000 - conn.client_have_voice_last_time
-                    if stop_duration >= self.silence_threshold_ms:
-                        conn.client_have_voice = False
-                        conn.client_voice_stop = True
-            return client_have_voice
-        except Exception as e:
-            print(f"VAD detection error: {e}")
-            return False
-    def reset_states(self):
-        """Reset VAD states"""
-        pass  # States are now stored in conn object, no need to reset here
+        """Factory method to create VAD instance - always returns EnergyVADProvider"""
+        # 忽略vad_mini_volume, vad_mini_length, vad_silence_timeout参数
+        # 将vad_pause_timeout作为min_silence_duration_ms传递
+        return EnergyVADProvider(
+            threshold=threshold,
+            min_silence_duration_ms=vad_pause_timeout
+        )
 class EnergyVADProvider(VADProviderBase):
-    """Audio energy-based VAD provider (fallback solution)"""
+    """Audio energy-based VAD provider"""
     def __init__(self, threshold=0.02, min_silence_duration_ms=300):
         print(f"Initializing EnergyVAD: threshold={threshold}, min_silence_duration_ms={min_silence_duration_ms}")
         # 初始化VAD配置，使用默认值作为备选

smartpi/onnx_pose_workflow.py CHANGED Viewed

@@ -5,7 +5,7 @@ import onnx
 import json
 import cv2
 import time  # 用于实时耗时计算
-from lib.posenet_utils import get_posenet_output
+from posenet_utils import get_posenet_output
 class PoseWorkflow:

smartpi/rknn_pose_workflow.py CHANGED Viewed

@@ -5,7 +5,7 @@ import json
 import os
 import time
 from rknnlite.api import RKNNLite  # RKNN核心库
-from lib.posenet_utils import get_posenet_output  # 姿态关键点提取逻辑（需支持返回3个值）
+from posenet_utils import get_posenet_output  # 姿态关键点提取逻辑（需支持返回3个值）
 class PoseWorkflow:

{smartpi-0.1.41.dist-info → smartpi-0.1.42.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: smartpi
-Version: 0.1.41
+Version: 0.1.42
 Summary: A library use for H2-RCU
 Author: ZMROBO
 Classifier: Programming Language :: Python :: 3

{smartpi-0.1.41.dist-info → smartpi-0.1.42.dist-info}/RECORD RENAMED Viewed

@@ -1,9 +1,9 @@
-smartpi/__init__.py,sha256=ioz08SpgjM2Df14M57OTfaAqxYbI-A6h2BeKEQ--8GI,356
+smartpi/__init__.py,sha256=7AdxZqsEjhv8Ht_bLXOoF3T3jGZ0TEnG-SVYdjs4NR0,356
 smartpi/_gui.py,sha256=ij-6HZAEIwdy_hvU7f0NkyQjx_-eephijlKbGUhf8Uo,2177
-smartpi/ai_asr.py,sha256=eoF5z4oRJWpSk13tqO2M2N7ZBCkNWHPhQDt4-uUo_ko,40079
+smartpi/ai_asr.py,sha256=wxh_1Klh8vJottAt19jq3qpOmM_Cw4DHQEPKpsHGhmE,40063
 smartpi/ai_llm.py,sha256=-khBK2PMwbYUDvbaCTRCktE4dFloqfai4mHI-V8GEXM,36751
 smartpi/ai_tts.py,sha256=OjAJs3XOykiufXuEDiaD0coKsWjknPdhwRpRzGLDZKU,36061
-smartpi/ai_vad.py,sha256=RJBieDmH5A0FITfULhpcn6Vpy_NhVXK5s416D-Lm2nU,8983
+smartpi/ai_vad.py,sha256=9J1xxuKF8GvojkL_JuXv7_xE8bQAo5D49Xasi8piKK8,3630
 smartpi/base_driver.py,sha256=nGkGp7cAgeNo7BijHMJKHfdVZKIKuZNTx9GqLFPP5TI,31486
 smartpi/camera.py,sha256=AVpZsMpW-24CP3BOfarjmRawMJdTOZY7Crq7FeLOqb4,3292
 smartpi/color_sensor.py,sha256=ckIXD81YnqPo6nENAnipNp3gY12FJ235QKj0e8Cul9E,521
@@ -20,13 +20,13 @@ smartpi/motor.py,sha256=DvFzREEzyRafGmSCagU6ASeoE8fnAsKYI4oYMrkXXJc,5351
 smartpi/move.py,sha256=s1ZnkFtp6SCBnxhBxp6qQjGbifdsY5hjVCwlarTsZto,6688
 smartpi/onnx_hand_workflow.py,sha256=ZCoaWC6GygZSrhM6jhsuB6qmQ6GiAFFrso6rKAGmue8,8157
 smartpi/onnx_image_workflow.py,sha256=-saM_NxR6yDz06xlWZOvHf6cq3zmtOCFhCyZTGqvuOk,6188
-smartpi/onnx_pose_workflow.py,sha256=7hoZ31XfZRAbgmdQbgfK-xePniMa5mDEggV12F-Uq5c,20970
+smartpi/onnx_pose_workflow.py,sha256=w0zGlXXBgIcxwjLF7eYHPEePqRw3KXvKQUqzVd_UrzQ,20966
 smartpi/onnx_text_workflow.py,sha256=6l9MTT2T1-rNye3_dSHLI2U749Z94aoRdkSe6CNXfHw,7191
 smartpi/onnx_voice_workflow.py,sha256=jkMFzy3RUnLo8LZAuCUfsS3YCJWSZzZuiE4RFoQ2HZw,17440
 smartpi/posenet_utils.py,sha256=o3scK41Eqvftav4y4vp6_6HinQWNCLeLpArXAzqQ-7s,8983
 smartpi/rknn_hand_workflow.py,sha256=wsVN_PYP9M-1AFaN4yqrGksUBoamYfujW0nQq4nv3kU,10160
 smartpi/rknn_image_workflow.py,sha256=4lTtcdmQ9KN5WiEnHayvqAd-dA0tiap5YXIqAMn5SoI,18444
-smartpi/rknn_pose_workflow.py,sha256=LA6tXOI81R1IQhQvgBWLGV_I8Qa-ROUgXqj3kTEMfmc,27840
+smartpi/rknn_pose_workflow.py,sha256=T09F9vcQ6-QG3PlFRpCI4AuTPzEl_s0C38wfMD21Ujo,27836
 smartpi/rknn_text_workflow.py,sha256=KNBSetj3tmlLxdZOm0yzbiDnjH8S5191fuxh5Mi-uCY,9632
 smartpi/rknn_voice_workflow.py,sha256=T8iRQWPtJYXqoHIZH2FiT1WLxwN3HQg4D-mg-5KvYdA,16326
 smartpi/servo.py,sha256=0p09Jk-IVk5nLXz2AqFvytiYSSe4sMxdy1FaNMQijoY,5770
@@ -70,7 +70,7 @@ smartpi/text_gte_model/config/tokenizer_config.json,sha256=w5RiDifbeIYy6vyGX5v94
 smartpi/text_gte_model/config/vocab.txt,sha256=oi9hP3uz_8h8XoHNh6rgLnVdJbIEm75zKoSKM8HzsC8,84758
 smartpi/text_gte_model/gte/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 smartpi/text_gte_model/gte/gte_model.onnx,sha256=XXYg6TUhzOx1SqAhp6ePDU0QgeK6DQEqHATMuQQJCNE,30468366
-smartpi-0.1.41.dist-info/METADATA,sha256=446DzYjpDU_MxEdwmQyiyx7pf6K-OAQIOW3sek2gnZU,614
-smartpi-0.1.41.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
-smartpi-0.1.41.dist-info/top_level.txt,sha256=PoLhUCmWAiQUg5UeN2fS-Y1iQyBbF2rdUlizXtpHGRQ,8
-smartpi-0.1.41.dist-info/RECORD,,
+smartpi-0.1.42.dist-info/METADATA,sha256=FxIXEB2WMTsKc8Fs8ukVfdnD3--vqy7fOEXFT5AQLq8,614
+smartpi-0.1.42.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
+smartpi-0.1.42.dist-info/top_level.txt,sha256=PoLhUCmWAiQUg5UeN2fS-Y1iQyBbF2rdUlizXtpHGRQ,8
+smartpi-0.1.42.dist-info/RECORD,,

{smartpi-0.1.41.dist-info → smartpi-0.1.42.dist-info}/WHEEL RENAMED Viewed

File without changes

{smartpi-0.1.41.dist-info → smartpi-0.1.42.dist-info}/top_level.txt RENAMED Viewed

File without changes

smartpi 0.1.41__py3-none-any.whl → 0.1.42__py3-none-any.whl

smartpi 0.1.41py3-none-any.whl → 0.1.42py3-none-any.whl