PyPI - openvisionkit - Versions diffs - 0.4.0__py3-none-any.whl - Mend

openvisionkit 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

openvisionkit/__init__.py +1 -0
openvisionkit/_version.py +24 -0
openvisionkit/capture/draw_object.py +296 -0
openvisionkit/capture/image_template.py +61 -0
openvisionkit/capture/screen_capture.py +13 -0
openvisionkit/capture/video_recorder.py +128 -0
openvisionkit/capture/video_template.py +336 -0
openvisionkit/lib/classifier.py +186 -0
openvisionkit/lib/face_detector.py +587 -0
openvisionkit/lib/face_mesh_detector.py +913 -0
openvisionkit/lib/form_detector.py +465 -0
openvisionkit/lib/form_roi_annotator.py +679 -0
openvisionkit/lib/form_roi_detector.py +1078 -0
openvisionkit/lib/fps_counter.py +38 -0
openvisionkit/lib/hair_segmentation.py +298 -0
openvisionkit/lib/hand_detector.py +1230 -0
openvisionkit/lib/image_detector.py +1095 -0
openvisionkit/lib/object_detector.py +401 -0
openvisionkit/lib/pose_detector.py +919 -0
openvisionkit/lib/selfie_segmentation.py +528 -0
openvisionkit/lib/text_detector.py +1229 -0
openvisionkit/utility/live_plot.py +141 -0
openvisionkit/utility/vision_utilis.py +871 -0
openvisionkit-0.4.0.dist-info/METADATA +1018 -0
openvisionkit-0.4.0.dist-info/RECORD +26 -0
openvisionkit-0.4.0.dist-info/WHEEL +4 -0

openvisionkit/capture/video_template.py ADDED Viewed

@@ -0,0 +1,336 @@
+import contextlib
+import ctypes
+import time
+from collections.abc import Callable
+from datetime import datetime
+from pathlib import Path
+import cv2
+from openvisionkit.capture.video_recorder import VideoRecorder
+from openvisionkit.lib.fps_counter import FPSCounter
+with contextlib.suppress(Exception):
+    ctypes.windll.user32.SetProcessDPIAware()
+class KeyEventManager:
+    def __init__(self):
+        self.handlers = {}
+    def register(self, key, callback):
+        """
+        key: ord('r'), ord('p'), etc.
+        callback(frame, state)
+        """
+        self.handlers[key] = callback
+    def handle(self, key, frame, state):
+        if key in self.handlers:
+            self.handlers[key](frame, state)
+def save_screenshot(frame, output_dir="screenshots", prefix="capture"):
+    """Saves a single frame as a timestamped PNG file.
+    Args:
+        frame (numpy.ndarray): BGR image to save.
+        output_dir (str): Directory where the file is written. Created if absent.
+            Default is 'screenshots'.
+        prefix (str): Filename prefix before the timestamp. Default is 'capture'.
+    Returns:
+        str: Absolute path of the saved PNG file.
+    """
+    Path(output_dir).mkdir(parents=True, exist_ok=True)
+    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
+    filename = Path(output_dir) / f"{prefix}_{timestamp}.png"
+    cv2.imwrite(str(filename), frame)
+    print(f"📸 Screenshot saved: {filename}")
+    return str(filename)
+def video_capture_template(
+    video_source: int | str = 0,
+    loop_forever: bool = True,
+    custom_logic: Callable[[cv2.typing.MatLike], cv2.typing.MatLike] | None = None,
+    state: dict | None = None,
+    key_manager: KeyEventManager | None = None,
+    window_name: str = "Demo",
+    show_window: bool = True,
+    resolution: tuple[int, int] = (1280, 720),
+    center_window: bool = True,
+    draw_fps: bool = True,
+    fps=15,
+    # MOUSE CALLBACK OPTION
+    mouse_callback: Callable | None = None,
+    mouse_callback_params: dict | None = None,
+    # VIDEO RECORDING OPTIONS
+    enable_auto_recording: bool = False,
+    enable_manual_recording: bool = False,
+    record_format="mp4",  # "mp4" | "gif"
+    # SCREENSHOT OPTIONS
+    enable_screenshot: bool = False,
+    screenshot_output_dir: str = "screenshots",
+    screenshot_prefix: str = "capture",
+    auto_screenshot_after_seconds: float | None = None,
+    auto_screenshot_repeat: bool = False,
+):
+    """
+    REUSABLE TEMPLATE for all OpenCV video demos.
+    New configurable features:
+        - resolution: Set camera resolution (e.g. 1280x720, 1920x1080)
+        - center_window: Automatically centers the OpenCV window on your screen using pyautogui
+    How to use:
+    1. Define your own logic as a function that takes a frame and returns the processed frame.
+    2. Call this template with the video source and your logic function.
+    3. FPS counter, ESC exit, resolution control, and window centering are already handled.
+    Parameters:
+        video_source (int or str):
+            - int (e.g. 0, 1, 2...) → camera index
+            - str → path to video file (mp4, avi, etc.)
+        loop_forever (bool): If True, loops the video file when it ends. Default = True
+        screen_capture (bool): If True, captures a portion of the screen instead of webcam/video. Default = False
+        screen_capture_bbox (tuple): Bounding box for screen capture (left, top, right, bottom). Default = (300, 300, 1500, 1000)
+        custom_logic (callable, optional):
+            Function that receives the frame and returns the modified frame.
+            This is where you put ALL your own logic (blink detection, face detection, etc.).
+        state (dict, optional):
+            A dictionary that is passed to key handlers and can be used to store game state, scores, or any other information you need to persist across frames and key events.
+             Default is None, but you can initialize it with your own dictionary before passing to the template. For example:
+             state = {'score': [0, 0], 'game_over': False}
+        key_manager (KeyEventManager, optional): An instance of KeyEventManager to handle key events. Default = None
+        show_window (bool): If True, displays the video window. Default = True
+        window_name (str): Name of the OpenCV window.
+        resolution (tuple[int, int]): Desired camera resolution (width, height). Default = (1280, 720)
+        center_window (bool): If True, automatically centers the window on screen. Default = True
+        draw_fps (bool): If True, calculates and displays FPS on the video feed. Default = True
+        fps: Frame rate for recording (only applies if enable_auto_recording is True). Default = 15
+        # MOUSE CALLBACK OPTION
+        mouse_callback (callable, optional): Function to handle mouse events. Default = None
+        mouse_callback_params (dict, optional): Additional parameters to pass to the mouse callback function. Default = None
+        # VIDEO RECORDING OPTIONS
+        enable_auto_recording (bool): If True, records the video feed to an output file automatically. Default = False
+        enable_manual_recording (bool): If True, allows starting/stopping recording with a key press (e.g. 'r' or 'R'). Default = False
+        record_format (str): Format for recording output ("mp4" or "gif"). Default = "mp4"
+         # SCREENSHOT OPTIONS
+        enable_screenshot (bool): If True, allows taking screenshots by pressing 's'. Default = True
+        screenshot_output_dir (str): Directory where screenshots will be saved. Default = "screenshots"
+        screenshot_prefix (str): Prefix for screenshot filenames. Default = "capture"
+        auto_screenshot_after_seconds (float, optional): If set, automatically takes a screenshot after this many seconds. Default = None (disabled)
+        auto_screenshot_repeat (bool): If True and auto_screenshot_after_seconds is set, continues to take screenshots at the specified interval. Default = False
+        Usasge:
+        1. Screenshot:
+          For repeated auto screenshots every 5 seconds:
+          video_capture_template(
+            video_source=0,
+              custom_logic=my_logic,
+              enable_screenshot=True,
+              auto_screenshot_after_seconds=5,
+              auto_screenshot_repeat=False,
+          )
+          For manual screenshots with 's' key:
+          video_capture_template(
+            video_source=0,
+              custom_logic=my_logic,
+              enable_screenshot=True,
+              auto_screenshot_after_seconds=None,
+              auto_screenshot_repeat=False,
+          )
+    """
+    cap = cv2.VideoCapture(video_source)
+    if not cap.isOpened():
+        print(f"Error: Could not open video source '{video_source}'")
+        return
+    frame_width, frame_height = resolution
+    cap.set(cv2.CAP_PROP_FRAME_WIDTH, frame_width)
+    cap.set(cv2.CAP_PROP_FRAME_HEIGHT, frame_height)
+    window_centered = False
+    first_frame_rendered = False
+    if state is None:
+        state = {}
+    # ── auto recording state ──────────────────────────────────────────────
+    auto_recorder: VideoRecorder | None = None
+    auto_recorder_started = False
+    # ── manual recording state ────────────────────────────────────────────
+    manual_recording = False  # True while the user is recording
+    manual_recorder: VideoRecorder | None = None
+    if draw_fps:
+        fps_counter = FPSCounter()
+    current_fps = fps  # will be updated each frame when draw_fps is True
+    start_time = time.time()
+    last_auto_screenshot_time = start_time
+    auto_screenshot_done = False
+    if show_window:
+        cv2.namedWindow(window_name, cv2.WINDOW_NORMAL | cv2.WINDOW_GUI_EXPANDED)
+        cv2.resizeWindow(window_name, frame_width, frame_height)
+        if mouse_callback is not None:
+            cv2.setMouseCallback(window_name, mouse_callback, mouse_callback_params)
+    while True:
+        if loop_forever and cap.get(cv2.CAP_PROP_POS_FRAMES) >= cap.get(
+            cv2.CAP_PROP_FRAME_COUNT
+        ):
+            cap.set(cv2.CAP_PROP_POS_FRAMES, 0)
+        ret, frame = cap.read()
+        if not ret:
+            print("End of video stream or failed to read frame.")
+            break
+        if custom_logic is not None:
+            frame = custom_logic(frame)
+        if draw_fps:
+            frame, current_fps = fps_counter.update(frame)
+        # ── AUTO RECORDING ────────────────────────────────────────────────
+        if enable_auto_recording:
+            if auto_recorder is None:
+                safe_fps = current_fps if current_fps and current_fps > 0 else 10
+                print("Initializing auto-recorder with FPS:", safe_fps)
+                auto_recorder = VideoRecorder(output_format=record_format, fps=safe_fps)
+            if not auto_recorder_started:
+                auto_recorder.start(frame.shape)
+                auto_recorder_started = True
+            auto_recorder.write(frame)
+            cv2.putText(
+                frame,
+                "REC (AUTO)",
+                (20, 80),
+                cv2.FONT_HERSHEY_SIMPLEX,
+                1,
+                (0, 255, 0),
+                2,
+            )
+        # ── MANUAL RECORDING ─────────────────────────────────────────────
+        if enable_manual_recording and manual_recording:
+            if manual_recorder is None:
+                # Initialise lazily the first time R is pressed
+                safe_fps = current_fps if current_fps and current_fps > 0 else 10
+                print("Initializing manual recorder with FPS:", safe_fps)
+                manual_recorder = VideoRecorder(
+                    output_format=record_format, fps=safe_fps
+                )
+                manual_recorder.start(frame.shape)
+            manual_recorder.write(frame)
+            cv2.putText(
+                frame,
+                "REC (MANUAL)",
+                (20, 120),
+                cv2.FONT_HERSHEY_SIMPLEX,
+                1,
+                (0, 0, 255),
+                2,
+            )
+        # ── AUTO SCREENSHOT ───────────────────────────────────────────────
+        if enable_screenshot and auto_screenshot_after_seconds is not None:
+            now = time.time()
+            if auto_screenshot_repeat:
+                if now - last_auto_screenshot_time >= auto_screenshot_after_seconds:
+                    save_screenshot(
+                        frame,
+                        output_dir=screenshot_output_dir,
+                        prefix=screenshot_prefix,
+                    )
+                    last_auto_screenshot_time = now
+            else:
+                if (
+                    not auto_screenshot_done
+                    and now - start_time >= auto_screenshot_after_seconds
+                ):
+                    save_screenshot(
+                        frame,
+                        output_dir=screenshot_output_dir,
+                        prefix=screenshot_prefix,
+                    )
+                    auto_screenshot_done = True
+        if show_window:
+            cv2.imshow(window_name, frame)
+            if center_window and not window_centered and first_frame_rendered:
+                try:
+                    import pyautogui  # noqa: PLC0415
+                    screen_width, screen_height = pyautogui.size()
+                    x = (screen_width - frame_width) // 2
+                    y = (screen_height - frame_height) // 2
+                    cv2.moveWindow(window_name, x, y)
+                except Exception:
+                    pass  # headless / no display — skip centering
+                window_centered = True
+            first_frame_rendered = True
+        key = cv2.waitKey(1) & 0xFF
+        # ESC → exit
+        if key == 27:
+            print("Exiting cleanly...")
+            break
+        # Custom key handlers
+        if key_manager:
+            key_manager.handle(key, frame, state)
+        # S → screenshot
+        if enable_screenshot and key in [ord("s"), ord("S")]:
+            save_screenshot(
+                frame, output_dir=screenshot_output_dir, prefix=screenshot_prefix
+            )
+        # R → toggle manual recording on/off
+        if enable_manual_recording and key in [ord("r"), ord("R")]:
+            manual_recording = not manual_recording
+            if manual_recording:
+                # ── START ──────────────────────────────────────────────
+                print("🎥 Manual recording: ON")
+                # Recorder is created fresh each time so a new file is opened
+                manual_recorder = None  # will be lazily created above on next frame
+            else:
+                # ── STOP ───────────────────────────────────────────────
+                print("⏹️  Manual recording: OFF — saving…")
+                if manual_recorder is not None:
+                    manual_recorder.stop()
+                    manual_recorder = None
+    # ── CLEANUP ───────────────────────────────────────────────────────────
+    cap.release()
+    cv2.destroyAllWindows()
+    if auto_recorder:
+        print("Stopping auto-recorder…")
+        auto_recorder.stop()
+    if manual_recorder:
+        print("Stopping manual recorder (cleanup)…")
+        manual_recorder.stop()

openvisionkit/lib/classifier.py ADDED Viewed

@@ -0,0 +1,186 @@
+"""
+Simple Classifier for Teachable Machine .h5 models
+Works well with TensorFlow 2.15 / 2.16 on Apple Silicon
+"""
+import os
+import cv2
+import numpy as np
+import tensorflow as tf
+from tensorflow.keras.models import load_model
+class Classifier:
+    def __init__(self, model_path: str, labels_path: str):
+        if not os.path.exists(model_path):
+            raise FileNotFoundError(f"Model not found: {model_path}")
+        if not os.path.exists(labels_path):
+            raise FileNotFoundError(f"Labels not found: {labels_path}")
+        print(f"Loading model: {model_path}")
+        self.model = load_model(model_path, compile=False)
+        with open(labels_path, encoding="utf-8") as f:
+            self.labels = [line.strip() for line in f.readlines() if line.strip()]
+        print(f"Model loaded | TF {tf.__version__} | {len(self.labels)} labels")
+        self.data = np.ndarray(shape=(1, 224, 224, 3), dtype=np.float32)
+    def preprocess(self, img: np.ndarray) -> np.ndarray:
+        resized = cv2.resize(img, (224, 224))
+        array = np.asarray(resized, dtype=np.float32)
+        return (array / 127.0) - 1.0
+    def predict(self, img: np.ndarray) -> tuple[list[float], int, str]:
+        processed = self.preprocess(img)
+        self.data[0] = processed
+        predictions = self.model.predict(self.data, verbose=0)
+        probs = predictions[0].tolist()
+        index = int(np.argmax(predictions))
+        label = self.labels[index] if index < len(self.labels) else f"Class {index}"
+        return probs, index, label
+    def getPrediction(
+        self,
+        img: np.ndarray,
+        draw: bool = True,
+        pos: tuple[int, int] = (30, 50),
+        scale: float = 1.5,
+        color: tuple[int, int, int] = (0, 255, 0),
+        thickness: int = 2,
+    ) -> tuple[list[float], int]:
+        probs, index, label = self.predict(img)
+        if draw:
+            cv2.putText(
+                img, label, pos, cv2.FONT_HERSHEY_COMPLEX, scale, color, thickness
+            )
+        return probs, index
+    def get_label(self, index: int) -> str:
+        return (
+            self.labels[index]
+            if 0 <= index < len(self.labels)
+            else f"Unknown ({index})"
+        )
+    # ─────────────────────────── NEW METHODS ───────────────────────────
+    def get_confidence(self, probs: list, index: int) -> float:
+        """Return the confidence percentage for a specific class index.
+        Args:
+          probs: Probability list from predict().
+          index: Class index to query.
+        Returns:
+          float: 0.0–100.0
+        """
+        if not probs or index >= len(probs):
+            return 0.0
+        return probs[index] * 100.0
+    def predict_top_n(self, img: np.ndarray, n: int = 3):
+        """Return the top-N predictions sorted by descending confidence.
+        Args:
+          img: BGR numpy array.
+          n: Number of top predictions to return.
+        Returns:
+          List[dict]: [{'label': str, 'index': int, 'confidence': float}, ...]
+        """
+        probs, _, _ = self.predict(img)
+        indices = sorted(range(len(probs)), key=lambda i: probs[i], reverse=True)[:n]
+        return [
+            {"label": self.get_label(i), "index": i, "confidence": probs[i] * 100.0}
+            for i in indices
+        ]
+    def get_all_predictions(self, probs: list):
+        """Return all class predictions paired with their labels.
+        Args:
+          probs: Probability list from predict().
+        Returns:
+          List[dict]: [{'label': str, 'index': int, 'confidence': float}] sorted desc.
+        """
+        return sorted(
+            [
+                {"label": self.get_label(i), "index": i, "confidence": p * 100.0}
+                for i, p in enumerate(probs)
+            ],
+            key=lambda x: x["confidence"],
+            reverse=True,
+        )
+    def is_confident(self, probs: list, threshold: float = 70.0) -> bool:
+        """Return True if the top prediction confidence meets the threshold.
+        Args:
+          probs: Probability list from predict().
+          threshold: Minimum confidence percentage (default 70 %).
+        Returns:
+          bool
+        """
+        if not probs:
+            return False
+        return max(probs) * 100.0 >= threshold
+    def predict_batch(self, images: list):
+        """Run predict() on a list of images and return all results.
+        Args:
+          images: List of BGR numpy arrays.
+        Returns:
+          List[dict]: [{'label': str, 'index': int, 'confidence': float}]
+        """
+        results = []
+        for img in images:
+            probs, index, label = self.predict(img)
+            results.append(
+                {
+                    "label": label,
+                    "index": index,
+                    "confidence": probs[index] * 100.0,
+                }
+            )
+        return results
+# ====================== Quick Test ======================
+if __name__ == "__main__":
+    MODEL_PATH = "hand-gesture/hand-sign-detection/model/keras_model.h5"
+    LABELS_PATH = "hand-gesture/hand-sign-detection/model/labels.txt"
+    classifier = Classifier(MODEL_PATH, LABELS_PATH)
+    cap = cv2.VideoCapture(0)
+    if not cap.isOpened():
+        print("Camera not opened. Try changing to cv2.VideoCapture(1)")
+        exit()
+    print("Press 'q' to quit")
+    while True:
+        ret, frame = cap.read()
+        if not ret:
+            break
+        probs, idx = classifier.getPrediction(
+            frame, draw=True, scale=1.7, color=(0, 255, 100)
+        )
+        conf = probs[idx] * 100
+        print(f"→ {classifier.get_label(idx)} | Confidence: {conf:.1f}%")
+        cv2.imshow("Classifier", frame)
+        if cv2.waitKey(1) & 0xFF == ord("q"):
+            break
+    cap.release()
+    cv2.destroyAllWindows()