PyPI - camera-llm - Versions diffs - 0.1.1__py3-none-any.whl - Mend

camera-llm 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

camera_llm/__init__.py +1 -0
camera_llm/camera_thread.py +82 -0
camera_llm/chat_session.py +81 -0
camera_llm/chat_store.py +56 -0
camera_llm/cli.py +72 -0
camera_llm/icon.ico +0 -0
camera_llm/llm_client.py +165 -0
camera_llm/main_window.py +102 -0
camera_llm/screens/__init__.py +1 -0
camera_llm/screens/screen1_home.py +242 -0
camera_llm/screens/screen2_capture.py +305 -0
camera_llm/screens/screen3_crop.py +263 -0
camera_llm/screens/screen4_model_select.py +184 -0
camera_llm/screens/screen5_chat.py +514 -0
camera_llm/screens/screen6_save.py +127 -0
camera_llm/screens/screen7_done.py +88 -0
camera_llm/styles.py +319 -0
camera_llm-0.1.1.dist-info/METADATA +145 -0
camera_llm-0.1.1.dist-info/RECORD +22 -0
camera_llm-0.1.1.dist-info/WHEEL +5 -0
camera_llm-0.1.1.dist-info/entry_points.txt +2 -0
camera_llm-0.1.1.dist-info/top_level.txt +1 -0

camera_llm/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ # Camera LLM Inference App

camera_llm/camera_thread.py ADDED Viewed

@@ -0,0 +1,82 @@
+"""
+CameraThread — captures frames from OpenCV VideoCapture in a background QThread
+and emits frame_ready(np.ndarray) signals to the GUI at ~30 fps.
+"""
+from __future__ import annotations
+import cv2
+import numpy as np
+from PySide6.QtCore import QThread, Signal
+class CameraThread(QThread):
+    """Background thread that continuously reads from a camera device."""
+    frame_ready = Signal(np.ndarray)
+    error       = Signal(str)
+    def __init__(self, camera_source: int | str = 0, parent=None):
+        super().__init__(parent)
+        self.camera_source = camera_source
+        self._running = False
+        self._cap: cv2.VideoCapture | None = None
+    # ── Public API ──────────────────────────────────────────────────────────
+    def start_capture(self, camera_source: int | str | None = None) -> None:
+        if camera_source is not None:
+            self.camera_source = camera_source
+        self._running = True
+        self.start()
+    def stop_capture(self) -> None:
+        self._running = False
+        self.wait(2000)  # give thread up to 2 s to finish
+    # ── QThread lifecycle ────────────────────────────────────────────────────
+    def run(self) -> None:
+        if isinstance(self.camera_source, int):
+            self._cap = cv2.VideoCapture(self.camera_source, cv2.CAP_DSHOW)
+            if not self._cap.isOpened():
+                # Try without backend hint (Linux / macOS)
+                self._cap = cv2.VideoCapture(self.camera_source)
+        else:
+            # IP camera URL
+            url = str(self.camera_source).strip()
+            # If the user enters a bare IP Webcam URL like "http://10.0.0.249:8080",
+            # OpenCV needs the actual video stream endpoint, which is usually "/video".
+            if url.startswith("http") and url.count("/") == 2:
+                url += "/video"
+            elif url.startswith("http") and url.count("/") == 3 and url.endswith("/"):
+                url += "video"
+            self._cap = cv2.VideoCapture(url)
+        if not self._cap.isOpened():
+            self.error.emit(f"Cannot open camera source: {self.camera_source}")
+            return
+        # Prefer 720p for a good quality / performance balance
+        self._cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1280)
+        self._cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 720)
+        self._cap.set(cv2.CAP_PROP_FPS, 30)
+        while self._running:
+            ret, frame = self._cap.read()
+            if not ret:
+                self.error.emit("Failed to read frame from camera")
+                break
+            # Convert BGR → RGB for Qt display
+            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+            self.frame_ready.emit(frame_rgb)
+            # ~30 fps → sleep ~33 ms
+            self.msleep(33)
+        if self._cap:
+            self._cap.release()
+            self._cap = None
+    def __del__(self):
+        self._running = False
+        if self._cap:
+            self._cap.release()

camera_llm/chat_session.py ADDED Viewed

@@ -0,0 +1,81 @@
+"""
+ChatSession — dataclass for a single chat session (image or video + messages).
+"""
+from __future__ import annotations
+import json
+from dataclasses import dataclass, field
+from datetime import datetime
+from pathlib import Path
+from typing import Literal
+@dataclass
+class ChatSession:
+    name:       str
+    media_type: Literal["image", "video"]
+    # For image: single data-URL string.
+    # For video: list of frame data-URL strings.
+    media_data: str | list[str]
+    model:      str
+    messages:   list[dict] = field(default_factory=list)
+    timestamp:  str        = field(default_factory=lambda: datetime.now().isoformat())
+    saved_at:   str        = ""   # populated at load time from file mtime
+    # ── Serialisation ────────────────────────────────────────────────────────
+    def to_dict(self) -> dict:
+        return {
+            "name":       self.name,
+            "media_type": self.media_type,
+            "media_data": self.media_data,
+            "model":      self.model,
+            "messages":   self.messages,
+            "timestamp":  self.timestamp,
+        }
+    def to_json(self) -> str:
+        return json.dumps(self.to_dict(), ensure_ascii=False, indent=2)
+    @classmethod
+    def from_dict(cls, data: dict) -> "ChatSession":
+        return cls(
+            name       = data["name"],
+            media_type = data["media_type"],
+            media_data = data["media_data"],
+            model      = data["model"],
+            messages   = data.get("messages", []),
+            timestamp  = data.get("timestamp", ""),
+        )
+    @classmethod
+    def from_json(cls, json_str: str) -> "ChatSession":
+        return cls.from_dict(json.loads(json_str))
+    # ── Helpers ──────────────────────────────────────────────────────────────
+    @property
+    def display_timestamp(self) -> str:
+        try:
+            dt = datetime.fromisoformat(self.timestamp)
+            return dt.strftime("%b %d, %Y  %H:%M")
+        except ValueError:
+            return self.timestamp
+    @property
+    def display_saved_at(self) -> str:
+        """Display the file modification time, falling back to creation timestamp."""
+        if self.saved_at:
+            try:
+                dt = datetime.fromisoformat(self.saved_at)
+                return dt.strftime("%b %d, %Y  %H:%M")
+            except ValueError:
+                pass
+        return self.display_timestamp
+    def get_thumbnail_data_url(self) -> str:
+        """Return the first (or only) frame data-URL for thumbnail display."""
+        if self.media_type == "image":
+            return self.media_data  # type: ignore[return-value]
+        frames = self.media_data  # type: ignore[assignment]
+        return frames[0] if frames else ""

camera_llm/chat_store.py ADDED Viewed

@@ -0,0 +1,56 @@
+"""
+ChatStore — persists and loads ChatSession objects as JSON files
+in the `chats/` directory next to the project root.
+"""
+from __future__ import annotations
+import re
+from pathlib import Path
+from camera_llm.chat_session import ChatSession
+CHATS_DIR = Path(__file__).parent.parent / "chats"
+def _ensure_dir() -> None:
+    CHATS_DIR.mkdir(parents=True, exist_ok=True)
+def _safe_filename(name: str) -> str:
+    """Strip characters that are not safe for filenames."""
+    safe = re.sub(r'[\\/:*?"<>|]', "_", name).strip()
+    return safe or "untitled"
+def save(session: ChatSession) -> Path:
+    """Write a ChatSession to a JSON file. Returns the file path."""
+    _ensure_dir()
+    filename = _safe_filename(session.name) + ".json"
+    path     = CHATS_DIR / filename
+    path.write_text(session.to_json(), encoding="utf-8")
+    return path
+def load_all() -> list[ChatSession]:
+    """Load every saved ChatSession from the chats/ directory."""
+    _ensure_dir()
+    sessions: list[ChatSession] = []
+    for fpath in sorted(CHATS_DIR.glob("*.json"), key=lambda p: p.stat().st_mtime, reverse=True):
+        try:
+            session = ChatSession.from_json(fpath.read_text(encoding="utf-8"))
+            from datetime import datetime
+            session.saved_at = datetime.fromtimestamp(fpath.stat().st_mtime).isoformat()
+            sessions.append(session)
+        except Exception:
+            pass  # skip malformed files
+    return sessions
+def delete(name: str) -> bool:
+    """Delete a saved chat by its name. Returns True if deleted."""
+    filename = _safe_filename(name) + ".json"
+    path     = CHATS_DIR / filename
+    if path.exists():
+        path.unlink()
+        return True
+    return False

camera_llm/cli.py ADDED Viewed

@@ -0,0 +1,72 @@
+"""
+Camera → LLM Inference App
+Entry point — creates the QApplication, applies the dark theme, and shows the MainWindow.
+Usage:
+    python main.py
+"""
+import sys
+import os
+# Ensure the project root is on the path
+sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
+from PySide6.QtWidgets import QApplication
+from PySide6.QtGui     import QFont, QIcon
+from PySide6.QtCore    import Qt
+from camera_llm.main_window import MainWindow
+# def get_resource_path(relative_path):
+#     """Get path to resource, works for both dev and PyInstaller bundle."""
+#     if hasattr(sys, '_MEIPASS'):
+#         # PyInstaller extracts files to a temp folder (_MEIPASS) at runtime
+#         return os.path.join(sys._MEIPASS, relative_path)
+#     return os.path.join(os.path.dirname(os.path.abspath(__file__)), relative_path)
+def run_app():
+    # High-DPI support
+    QApplication.setHighDpiScaleFactorRoundingPolicy(
+        Qt.HighDpiScaleFactorRoundingPolicy.PassThrough
+    )
+    app = QApplication(sys.argv)
+    app.setApplicationName("Camera LLM Inference")
+    app.setOrganizationName("CameraLLM")
+    # Set default font
+    font = QFont("Segoe UI", 10)
+    font.setHintingPreference(QFont.HintingPreference.PreferNoHinting)
+    app.setFont(font)
+    # Set app-wide icon (affects taskbar, window title bar, etc.)
+    icon_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "icon.ico")
+    if os.path.exists(icon_path):
+        app.setWindowIcon(QIcon(icon_path))
+    # Apply dark theme via pyqtdarktheme as a base, then layer our custom stylesheet
+    try:
+        import qdarktheme
+        base_sheet = qdarktheme.load_stylesheet("dark")
+        app.setStyleSheet(base_sheet)
+    except (ImportError, Exception):
+        pass  # Our MAIN_STYLESHEET in styles.py covers everything standalone
+    window = MainWindow()
+    window.show()
+    sys.exit(app.exec())
+def cli():
+    if len(sys.argv) > 1 and sys.argv[1] == "run":
+        # Remove 'run' so QApplication doesn't try to parse it
+        sys.argv = [sys.argv[0]] + sys.argv[2:]
+        run_app()
+    else:
+        print("Usage: camera-llm run")
+        sys.exit(1)
+if __name__ == "__main__":
+    run_app()

camera_llm/icon.ico ADDED Viewed

Binary file

camera_llm/llm_client.py ADDED Viewed

@@ -0,0 +1,165 @@
+"""
+LMStudioClient — wraps the openai Python client pointed at a local LM Studio server.
+All image/video encoding is done fully in-memory (no files written to disk).
+"""
+from __future__ import annotations
+import base64
+from typing import Generator
+import cv2
+import numpy as np
+from openai import OpenAI, OpenAIError
+class LMStudioClient:
+    """Interface to LM Studio's OpenAI-compatible local API."""
+    def __init__(self, base_url: str = "http://localhost:1234/v1"):
+        self.base_url = base_url
+        self._client  = self._make_client(base_url)
+    # ── Connection ───────────────────────────────────────────────────────────
+    def set_base_url(self, base_url: str) -> None:
+        self.base_url = base_url
+        self._client  = self._make_client(base_url)
+    def _make_client(self, base_url: str) -> OpenAI:
+        return OpenAI(base_url=base_url, api_key="lm-studio")
+    # ── Model discovery ──────────────────────────────────────────────────────
+    def list_models(self) -> list[str]:
+        """Return model IDs available on the local server."""
+        try:
+            models = self._client.models.list()
+            return [m.id for m in models.data]
+        except OpenAIError as exc:
+            raise ConnectionError(
+                f"Cannot reach LM Studio at {self.base_url}.\n"
+                f"Make sure LM Studio is running and the server is started.\n\nDetail: {exc}"
+            ) from exc
+    # ── Encoding helpers (fully in-memory) ───────────────────────────────────
+    @staticmethod
+    def encode_frame(frame: np.ndarray, quality: int = 85) -> str:
+        """
+        Encode a numpy RGB frame to a base64 JPEG data-URL string.
+        No file is written to disk.
+        """
+        # Convert RGB → BGR for cv2 (cv2 works in BGR)
+        bgr = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
+        encode_params = [cv2.IMWRITE_JPEG_QUALITY, quality]
+        success, buffer = cv2.imencode(".jpg", bgr, encode_params)
+        if not success:
+            raise RuntimeError("cv2.imencode failed")
+        b64 = base64.b64encode(buffer.tobytes()).decode("utf-8")
+        return f"data:image/jpeg;base64,{b64}"
+    @staticmethod
+    def sample_video_frames(
+        frames: list[np.ndarray], max_frames: int = 8
+    ) -> list[np.ndarray]:
+        """
+        Down-sample a list of video frames to at most max_frames evenly spaced frames.
+        """
+        if not frames:
+            return []
+        if len(frames) <= max_frames:
+            return frames
+        step  = len(frames) / max_frames
+        idxs  = [int(i * step) for i in range(max_frames)]
+        return [frames[i] for i in idxs]
+    @staticmethod
+    def encode_video_frames(
+        frames: list[np.ndarray], max_frames: int = 8
+    ) -> list[str]:
+        """Return a list of base64 data-URL strings for sampled video frames."""
+        sampled = LMStudioClient.sample_video_frames(frames, max_frames)
+        return [LMStudioClient.encode_frame(f) for f in sampled]
+    @staticmethod
+    def encode_video_as_grid(
+        frames: list[np.ndarray], max_frames: int = 8, cols: int = 4
+    ) -> str:
+        """Stitch sampled frames into a single image grid (robust for local VLMs)."""
+        sampled = LMStudioClient.sample_video_frames(frames, max_frames)
+        if not sampled:
+            return ""
+        # Resize to keep the grid reasonable
+        target_w, target_h = 320, 180
+        resized = [cv2.resize(f, (target_w, target_h)) for f in sampled]
+        # Pad with black frames if needed
+        while len(resized) % cols != 0:
+            resized.append(np.zeros((target_h, target_w, 3), dtype=np.uint8))
+        rows = []
+        for i in range(0, len(resized), cols):
+            row = np.hstack(resized[i:i+cols])
+            rows.append(row)
+        grid = np.vstack(rows)
+        return LMStudioClient.encode_frame(grid)
+    # ── Chat ─────────────────────────────────────────────────────────────────
+    def chat(
+        self,
+        messages: list[dict],
+        model: str,
+        stream: bool = True,
+    ) -> Generator[str, None, None]:
+        """
+        Send a messages list to the model and yield token chunks.
+        Supports streaming for responsive chat UI.
+        """
+        try:
+            response = self._client.chat.completions.create(
+                model=model,
+                messages=messages,
+                stream=stream,
+                temperature=0.7,
+                max_tokens=2048,
+            )
+            if stream:
+                for chunk in response:
+                    delta = chunk.choices[0].delta
+                    if delta and delta.content:
+                        yield delta.content
+            else:
+                yield response.choices[0].message.content or ""
+        except OpenAIError as exc:
+            raise RuntimeError(f"LLM request failed: {exc}") from exc
+    # ── Build initial vision message ─────────────────────────────────────────
+    @staticmethod
+    def build_image_message(data_url: str, user_text: str) -> dict:
+        """Construct the first user message containing a still image."""
+        return {
+            "role": "user",
+            "content": [
+                {
+                    "type": "image_url",
+                    "image_url": {"url": data_url},
+                },
+                {"type": "text", "text": user_text},
+            ],
+        }
+    @staticmethod
+    def build_video_message(data_urls: list[str], user_text: str) -> dict:
+        """Construct the first user message with multiple video frames."""
+        content = []
+        for i, url in enumerate(data_urls):
+            content.append({
+                "type": "image_url",
+                "image_url": {"url": url},
+            })
+        content.append({"type": "text", "text": user_text})
+        return {"role": "user", "content": content}

camera_llm/main_window.py ADDED Viewed

@@ -0,0 +1,102 @@
+"""
+MainWindow — hosts a QStackedWidget for all 7 screens plus the slide-in side panel.
+Navigation contract:
+    Each screen receives a reference to MainWindow and calls
+    self.main_window.navigate_to(SCREEN_ID, **kwargs) to move forward,
+    or self.main_window.go_home() to return to Screen 1.
+Screen indices:
+    0  Screen1_Home
+    1  Screen2_Capture
+    2  Screen3_Crop
+    3  Screen4_ModelSelect
+    4  Screen5_Chat
+    5  Screen6_Save
+    6  Screen7_Done
+"""
+from __future__ import annotations
+from PySide6.QtCore    import Qt, QPropertyAnimation, QEasingCurve
+from PySide6.QtWidgets import (
+    QMainWindow, QWidget, QHBoxLayout, QStackedWidget, QSizePolicy
+)
+from camera_llm.styles import MAIN_STYLESHEET
+class MainWindow(QMainWindow):
+    # ── Screen indices ───────────────────────────────────────────────────────
+    HOME           = 0
+    CAPTURE        = 1
+    CROP           = 2
+    MODEL_SELECT   = 3
+    CHAT           = 4
+    SAVE           = 5
+    DONE           = 6
+    def __init__(self):
+        super().__init__()
+        self.setWindowTitle("Camera → LLM Inference")
+        self.resize(1100, 780)
+        self.setMinimumSize(900, 640)
+        self.setStyleSheet(MAIN_STYLESHEET)
+        # ── Shared state (passed between screens via navigate_to) ────────────
+        self.capture_mode: str          = "image"   # "image" | "video"
+        self.captured_frame             = None       # np.ndarray  (image still)
+        self.captured_frames: list      = []         # list[np.ndarray] (video)
+        self.media_data_url             = None       # str  (image data-URL)
+        self.media_data_urls: list      = []         # list[str] (video data-URLs)
+        self.stitched_data_url: str     = ""         # str (video grid data-URL)
+        self.selected_model: str        = ""
+        self.lm_base_url: str           = "http://localhost:1234/v1"
+        # ── Build UI ─────────────────────────────────────────────────────────
+        self._root   = QWidget()
+        self._layout = QHBoxLayout(self._root)
+        self._layout.setContentsMargins(0, 0, 0, 0)
+        self._layout.setSpacing(0)
+        self.setCentralWidget(self._root)
+        # Lazy import screens to avoid circular dependency at module level
+        from camera_llm.screens.screen1_home         import Screen1_Home
+        from camera_llm.screens.screen2_capture      import Screen2_Capture
+        from camera_llm.screens.screen3_crop         import Screen3_Crop
+        from camera_llm.screens.screen4_model_select import Screen4_ModelSelect
+        from camera_llm.screens.screen5_chat         import Screen5_Chat
+        from camera_llm.screens.screen6_save         import Screen6_Save
+        from camera_llm.screens.screen7_done         import Screen7_Done
+        self.screen1 = Screen1_Home(self)
+        self.screen2 = Screen2_Capture(self)
+        self.screen3 = Screen3_Crop(self)
+        self.screen4 = Screen4_ModelSelect(self)
+        self.screen5 = Screen5_Chat(self)
+        self.screen6 = Screen6_Save(self)
+        self.screen7 = Screen7_Done(self)
+        self._stack = QStackedWidget()
+        for screen in [
+            self.screen1, self.screen2, self.screen3, self.screen4,
+            self.screen5, self.screen6, self.screen7,
+        ]:
+            self._stack.addWidget(screen)
+        self._layout.addWidget(self._stack)
+        self.navigate_to(self.HOME)
+    # ── Navigation ───────────────────────────────────────────────────────────
+    def navigate_to(self, screen_index: int, **kwargs) -> None:
+        """Switch to the given screen and call its on_enter(**kwargs) hook."""
+        self._stack.setCurrentIndex(screen_index)
+        screen = self._stack.currentWidget()
+        if hasattr(screen, "on_enter"):
+            screen.on_enter(**kwargs)
+    def go_home(self) -> None:
+        """Return to Screen 1 and refresh the saved-chats side panel."""
+        self.screen1.refresh_side_panel()
+        self._stack.setCurrentIndex(self.HOME)

camera_llm/screens/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ # Screens package