PyPI - smartpi - Versions diffs - 0.1.35__py3-none-any.whl → 0.1.36__py3-none-any.whl - Mend

smartpi 0.1.35py3-none-any.whl → 0.1.36py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

smartpi/__init__.py +1 -1
smartpi/camera.py +84 -0
smartpi/onnx_hand_workflow.py +201 -0
smartpi/onnx_image_workflow.py +176 -0
smartpi/onnx_pose_workflow.py +482 -0
smartpi/onnx_text_workflow.py +173 -0
smartpi/onnx_voice_workflow.py +437 -0
smartpi/posenet_utils.py +222 -0
smartpi/rknn_hand_workflow.py +245 -0
smartpi/rknn_image_workflow.py +405 -0
smartpi/rknn_pose_workflow.py +592 -0
smartpi/rknn_text_workflow.py +240 -0
smartpi/rknn_voice_workflow.py +394 -0
{smartpi-0.1.35.dist-info → smartpi-0.1.36.dist-info}/METADATA +1 -1
smartpi-0.1.36.dist-info/RECORD +32 -0
smartpi-0.1.35.dist-info/RECORD +0 -20
{smartpi-0.1.35.dist-info → smartpi-0.1.36.dist-info}/WHEEL +0 -0
{smartpi-0.1.35.dist-info → smartpi-0.1.36.dist-info}/top_level.txt +0 -0

smartpi/__init__.py CHANGED Viewed

@@ -4,5 +4,5 @@ from .base_driver import P1, P2, P3, P4, P5, P6, M1, M2, M3, M4, M5, M6
 __all__ = ["base_driver","gui","ultrasonic","touch_sensor","temperature","humidity","light_sensor","color_sensor","motor","servo","led","flash",
            "P1", "P2", "P3", "P4", "P5", "P6", "M1", "M2", "M3", "M4", "M5", "M6"]
-__version__ = "0.1.35"
+__version__ = "0.1.36"

smartpi/camera.py ADDED Viewed

@@ -0,0 +1,84 @@
+# coding: utf-8
+import cv2
+import os
+import time
+import platform
+class Camera:
+    def __init__(self, indexes=[0, 1, 2, 3], target_width=640, target_height=480):
+        self.cap = None
+        self.indexes = indexes
+        self.target_width = target_width
+        self.target_height = target_height
+        self.open_camera()
+    def open_camera(self):
+        """打开摄像头（硬件加速+参数优化）"""
+        for idx in self.indexes:
+            try:
+                # 适配linux/Android的V4L2硬件加速（RK芯片优先）
+                if platform.system() == "Linux":
+                    cap = cv2.VideoCapture(idx, cv2.CAP_V4L2)
+                    # 尝试启用硬件加速（兼容不同OpenCV版本）
+                    try:
+                        # 对于较新版本的OpenCV
+                        if hasattr(cv2, 'CAP_PROP_HW_ACCELERATION') and hasattr(cv2, 'VIDEO_ACCELERATION_ANY'):
+                            cap.set(cv2.CAP_PROP_HW_ACCELERATION, cv2.VIDEO_ACCELERATION_ANY)
+                    except AttributeError as ae:
+                        print(f"硬件加速设置不支持，使用默认配置: {ae}")
+                else:
+                    cap = cv2.VideoCapture(idx)
+                if cap.isOpened():
+                    # 尝试设置分辨率
+                    cap.set(cv2.CAP_PROP_FRAME_WIDTH, self.target_width)
+                    cap.set(cv2.CAP_PROP_FRAME_HEIGHT, self.target_height)
+                    # 获取实际设置的分辨率
+                    actual_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+                    actual_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+                    print(f"摄像头 {idx} 已打开, 分辨率: {actual_width}x{actual_height}")
+                    self.cap = cap
+                    return True
+            except Exception as e:
+                print(f"尝试打开摄像头 {idx} 失败: {e}")
+                continue
+        print("无法打开任何摄像头")
+        return False
+    def read_frame(self):
+        """读取一帧并自动处理错误"""
+        if not self.cap or not self.cap.isOpened():
+            return False, None
+        ret, frame = self.cap.read()
+        if not ret:
+            print("读取帧失败，尝试重新打开摄像头...")
+            self.release()
+            time.sleep(1)
+            if self.open_camera():
+                return self.read_frame()
+            return False, None
+        # 调整到目标分辨率
+        if frame.shape[1] != self.target_width or frame.shape[0] != self.target_height:
+            frame = cv2.resize(frame, (self.target_width, self.target_height))
+        return True, frame
+    def get_resolution(self):
+        """获取当前分辨率"""
+        if self.cap and self.cap.isOpened():
+            return (
+                int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH)),
+                int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+            )
+        return self.target_width, self.target_height
+    def release(self):
+        """释放摄像头资源"""
+        if self.cap and self.cap.isOpened():
+            self.cap.release()
+            self.cap = None

smartpi/onnx_hand_workflow.py ADDED Viewed

@@ -0,0 +1,201 @@
+import cv2
+import numpy as np
+import onnxruntime as ort
+import mediapipe as mp
+import json
+from PIL import Image
+import time  # 用于时间测量
+class GestureWorkflow:
+    def __init__(self, model_path):
+        # 初始化MediaPipe Hands
+        self.mp_hands = mp.solutions.hands
+        self.hands = self.mp_hands.Hands(
+            static_image_mode=False,  # 视频流模式  如果只是获取照片的手势关键点 请设置为True
+            max_num_hands=1,#如果想要检测双手，请设置成2
+            min_detection_confidence=0.5,#手势关键点的阈值
+            model_complexity=0#使用最简单的模型  如果效果不准确 可以考虑设置比较复制的模型  1
+        )
+        # 初始化元数据
+        self.min_vals = None
+        self.max_vals = None
+        self.class_labels = None
+        # 加载模型和元数据
+        self.load_model(model_path)
+    def load_model(self, model_path):
+        """加载模型并解析元数据"""
+        # 初始化ONNX Runtime会话
+        self.session = ort.InferenceSession(model_path)
+        # 加载元数据
+        self._load_metadata()
+    def _load_metadata(self):
+        """从ONNX模型元数据中加载归一化参数和类别标签"""
+        model_meta = self.session.get_modelmeta()
+        # 检查custom_metadata_map是否存在
+        if hasattr(model_meta, 'custom_metadata_map'):
+            metadata = model_meta.custom_metadata_map
+            if 'minMaxValues' in metadata:
+                min_max_data = json.loads(metadata['minMaxValues'])
+                self.min_vals = min_max_data.get('min')
+                self.max_vals = min_max_data.get('max')
+            if 'classes' in metadata:
+                class_labels = json.loads(metadata['classes'])
+                self.class_labels = list(class_labels.values()) if isinstance(class_labels, dict) else class_labels
+        else:
+            # 对于旧版本的ONNX Runtime，使用metadata_props
+            for prop in model_meta.metadata_props:
+                if prop.key == 'minMaxValues':
+                    min_max_data = json.loads(prop.value)
+                    self.min_vals = min_max_data.get('min')
+                    self.max_vals = min_max_data.get('max')
+                elif prop.key == 'classes':
+                    class_labels = json.loads(prop.value)
+                    self.class_labels = list(class_labels.values()) if isinstance(class_labels, dict) else class_labels
+        # 设置默认值
+        if self.class_labels is None:
+            self.class_labels = ["点赞", "点踩", "胜利", "拳头", "我爱你", "手掌"]
+    def preprocess_image(self, image, target_width=224, target_height=224):
+        """
+        预处理图像：保持比例缩放并居中放置在目标尺寸的画布上
+        返回处理后的OpenCV图像 (BGR格式)
+        """
+        # 将OpenCV图像转换为PIL格式
+        image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+        pil_image = Image.fromarray(image_rgb)
+        # 计算缩放比例
+        width, height = pil_image.size
+        scale = min(target_width / width, target_height / height)
+        # 计算新尺寸和位置
+        new_width = int(width * scale)
+        new_height = int(height * scale)
+        x = (target_width - new_width) // 2
+        y = (target_height - new_height) // 2
+        # 创建白色背景画布并粘贴缩放后的图像
+        canvas = Image.new('RGB', (target_width, target_height), (255, 255, 255))
+        resized_image = pil_image.resize((new_width, new_height), Image.Resampling.LANCZOS)
+        canvas.paste(resized_image, (x, y))
+        # 转换回OpenCV格式
+        processed_image = np.array(canvas)
+        return cv2.cvtColor(processed_image, cv2.COLOR_RGB2BGR)
+    def extract_hand_keypoints(self, image):
+        """从图像中提取手部关键点"""
+        # 转换图像为RGB格式并处理
+        image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+        results = self.hands.process(image_rgb)
+        if results.multi_hand_landmarks:
+            # 只使用检测到的第一只手
+            landmarks = results.multi_hand_world_landmarks[0]
+            # 提取关键点坐标
+            keypoints = []
+            for landmark in landmarks.landmark:
+                keypoints.extend([landmark.x, landmark.y, landmark.z])
+            return np.array(keypoints, dtype=np.float32)
+        return None
+    def normalize_keypoints(self, keypoints):
+        """归一化关键点数据"""
+        if self.min_vals is None or self.max_vals is None:
+            return keypoints  # 如果没有归一化参数，返回原始数据
+        normalized = []
+        for i, value in enumerate(keypoints):
+            if i < len(self.min_vals) and i < len(self.max_vals):
+                min_val = self.min_vals[i]
+                max_val = self.max_vals[i]
+                if max_val - min_val > 0:
+                    normalized.append((value - min_val) / (max_val - min_val))
+                else:
+                    normalized.append(0)
+            else:
+                normalized.append(value)
+        return np.array(normalized, dtype=np.float32)
+    def predict_frame(self, frame):
+        """执行手势分类预测（直接处理图像帧）"""
+        # 记录开始时间
+        start_time = time.time()
+        # 预处理图像
+        processed_image = self.preprocess_image(frame, 224, 224)
+        # 提取关键点
+        keypoints = self.extract_hand_keypoints(processed_image)
+        min_time = time.time()
+        hand_time = min_time - start_time
+        #print(f"关键点识别耗时: {hand_time:.4f}秒")
+        if keypoints is None:
+            return None, {"error": "未检测到手部"}
+        # 归一化关键点
+        normalized_kps = self.normalize_keypoints(keypoints)
+        # 准备ONNX输入
+        input_data = normalized_kps.reshape(1, -1).astype(np.float32)
+        # 运行推理
+        input_name = self.session.get_inputs()[0].name
+        outputs = self.session.run(None, {input_name: input_data})
+        predictions = outputs[0][0]
+        # 获取预测结果
+        class_id = np.argmax(predictions)
+        confidence = float(predictions[class_id])
+        # 获取类别标签
+        label = self.class_labels[class_id] if class_id < len(self.class_labels) else f"未知类别 {class_id}"
+        end_time = time.time()
+        all_time = end_time - start_time
+        onnx_time = end_time - min_time
+        print(f"onnx耗时: {onnx_time:.4f}秒")
+        print(f"总耗时: {all_time:.4f}秒")
+        # 返回原始结果和格式化结果
+        raw_result = predictions.tolist()
+        formatted_result = {
+            'class': label,
+            'confidence': confidence,
+            'class_id': class_id,
+            'probabilities': raw_result
+        }
+        return raw_result, formatted_result
+    # 保留原始方法以兼容旧代码
+    def predict(self, image_path):
+        """执行手势分类预测（从文件路径）"""
+        try:
+            # 使用PIL库读取图像，避免libpng版本问题
+            pil_image = Image.open(image_path)
+            # 转换为RGB格式
+            rgb_image = pil_image.convert('RGB')
+            # 转换为numpy数组
+            image_array = np.array(rgb_image)
+            # 转换为BGR格式（OpenCV使用的格式）
+            image = cv2.cvtColor(image_array, cv2.COLOR_RGB2BGR)
+            if image is None:
+                raise ValueError(f"无法读取图像: {image_path}")
+            return self.predict_frame(image)
+        except Exception as e:
+            # 如果PIL失败，尝试使用cv2作为备选
+            image = cv2.imread(image_path)
+            if image is None:
+                raise ValueError(f"无法读取图像: {image_path}")
+            return self.predict_frame(image)

smartpi/onnx_image_workflow.py ADDED Viewed

@@ -0,0 +1,176 @@
+import onnxruntime as ort
+import numpy as np
+from PIL import Image
+import onnx
+import cv2
+import time
+class ImageWorkflow:
+    def __init__(self, model_path=None):
+        self.session = None
+        self.classes = []
+        self.metadata = {}
+        self.input_shape = [1, 224, 224, 3]  # 默认输入形状
+        if model_path:
+            self.load_model(model_path)
+    def load_model(self, model_path):
+        """加载模型并解析元数据"""
+        try:
+            # 读取ONNX元数据
+            onnx_model = onnx.load(model_path)
+            for meta in onnx_model.metadata_props:
+                self.metadata[meta.key] = meta.value
+            # 解析类别标签
+            if 'classes' in self.metadata:
+                self.classes = eval(self.metadata['classes'])
+            # 初始化推理会话
+            self.session = ort.InferenceSession(model_path)
+            self._parse_input_shape()
+        except Exception as e:
+            print(f"模型加载失败: {e}")
+    def _parse_input_shape(self):
+        """自动解析输入形状"""
+        input_info = self.session.get_inputs()[0]
+        shape = []
+        for dim in input_info.shape:
+            # 处理动态维度（用1替代）
+            shape.append(1 if isinstance(dim, str) or dim < 0 else int(dim))
+        self.input_shape = shape
+    def _preprocess(self, image_path):
+        """标准化预处理流程"""
+        try:
+            img = Image.open(image_path).convert("RGB")
+            # 获取目标尺寸（假设形状为 [N, H, W, C]）
+            _, target_h, target_w, _ = self.input_shape
+            # 调整尺寸
+            img = img.resize((target_w, target_h), Image.BILINEAR)
+            # 转换为numpy数组并归一化
+            img_array = np.array(img).astype(np.float32) / 255.0
+            # 添加batch维度
+            return np.expand_dims(img_array, axis=0)
+        except Exception as e:
+            print(f"图像预处理失败: {e}")
+            return None
+    def inference(self, data, model_path=None):
+        """执行推理"""
+        if model_path and not self.session:
+            self.load_model(model_path)
+        input_data = self._preprocess(data)
+        if input_data is None:
+            return None, None
+        try:
+            # 运行推理
+            outputs = self.session.run(None, {self.session.get_inputs()[0].name: input_data})
+            raw = outputs[0][0]  # 假设输出形状为 [1, n_classes]
+            # 格式化输出
+            formatted = self._format_result(raw)
+            return raw, formatted
+        except Exception as e:
+            print(f"推理失败: {e}")
+            return None, None
+    def inference_frame(self, frame_data, model_path=None):
+        """直接使用帧数据进行推理，无需文件IO
+        返回值：raw, formatted
+        formatted字典包含：class, confidence, probabilities, preprocess_time, inference_time
+        """
+        if model_path and not self.session:
+            self.load_model(model_path)
+        # 测量预处理时间
+        preprocess_start = time.time()
+        input_data = self._preprocess_frame(frame_data)
+        preprocess_time = time.time() - preprocess_start
+        if input_data is None:
+            return None, None
+        try:
+            # 测量推理时间
+            inference_start = time.time()
+            # 运行推理
+            outputs = self.session.run(None, {self.session.get_inputs()[0].name: input_data})
+            inference_time = time.time() - inference_start
+            raw = outputs[0][0]  # 假设输出形状为 [1, n_classes]
+            # 格式化输出
+            formatted = self._format_result(raw)
+            # 添加时间信息到返回结果
+            formatted['preprocess_time'] = preprocess_time
+            formatted['inference_time'] = inference_time
+            # 计算总耗时
+            total_time = preprocess_time + inference_time
+            print(f"帧推理耗时: {total_time:.4f}秒 - 识别结果: {formatted['class']} ({formatted['confidence']}%)")
+            return raw, formatted
+        except Exception as e:
+            print(f"帧数据推理失败: {e}")
+            return None, None
+    def _preprocess_frame(self, frame_data):
+        """处理帧数据的预处理流程"""
+        try:
+            # 确保输入是numpy数组
+            if not isinstance(frame_data, np.ndarray):
+                print("错误: 帧数据必须是numpy数组")
+                return None
+            # OpenCV读取的帧是BGR格式，转换为RGB
+            img = cv2.cvtColor(frame_data, cv2.COLOR_BGR2RGB)
+            # 获取目标尺寸（假设形状为 [N, H, W, C]）
+            _, target_h, target_w, _ = self.input_shape
+            # 调整尺寸
+            img = cv2.resize(img, (target_w, target_h), interpolation=cv2.INTER_LINEAR)
+            # 转换为numpy数组并归一化
+            img_array = img.astype(np.float32) / 255.0
+            # 添加batch维度
+            return np.expand_dims(img_array, axis=0)
+        except Exception as e:
+            print(f"帧数据预处理失败: {e}")
+            return None
+    def _format_result(self, predictions):
+        """生成标准化输出"""
+        class_idx = np.argmax(predictions)
+        confidence = int(predictions[class_idx] * 100)
+        return {
+            'class': self.classes[class_idx] if self.classes else str(class_idx),
+            'confidence': confidence,
+            'probabilities': predictions.tolist()
+        }
+# 使用示例
+if __name__ == "__main__":
+    # 预加载模型
+    model = ImageWorkflow("model.onnx")
+    # 使用帧数据进行推理
+    # 假设frame是通过cv2获取的帧
+    # raw, res = model.inference_frame(frame)
+    # print(f"识别结果: {res['class']} ({res['confidence']}%)")

smartpi 0.1.35__py3-none-any.whl → 0.1.36__py3-none-any.whl

smartpi 0.1.35py3-none-any.whl → 0.1.36py3-none-any.whl