camera-llm 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
camera_llm/__init__.py ADDED
@@ -0,0 +1 @@
1
+ # Camera LLM Inference App
@@ -0,0 +1,82 @@
1
+ """
2
+ CameraThread — captures frames from OpenCV VideoCapture in a background QThread
3
+ and emits frame_ready(np.ndarray) signals to the GUI at ~30 fps.
4
+ """
5
+ from __future__ import annotations
6
+
7
+ import cv2
8
+ import numpy as np
9
+ from PySide6.QtCore import QThread, Signal
10
+
11
+
12
+ class CameraThread(QThread):
13
+ """Background thread that continuously reads from a camera device."""
14
+
15
+ frame_ready = Signal(np.ndarray)
16
+ error = Signal(str)
17
+
18
+ def __init__(self, camera_source: int | str = 0, parent=None):
19
+ super().__init__(parent)
20
+ self.camera_source = camera_source
21
+ self._running = False
22
+ self._cap: cv2.VideoCapture | None = None
23
+
24
+ # ── Public API ──────────────────────────────────────────────────────────
25
+
26
+ def start_capture(self, camera_source: int | str | None = None) -> None:
27
+ if camera_source is not None:
28
+ self.camera_source = camera_source
29
+ self._running = True
30
+ self.start()
31
+
32
+ def stop_capture(self) -> None:
33
+ self._running = False
34
+ self.wait(2000) # give thread up to 2 s to finish
35
+
36
+ # ── QThread lifecycle ────────────────────────────────────────────────────
37
+
38
+ def run(self) -> None:
39
+ if isinstance(self.camera_source, int):
40
+ self._cap = cv2.VideoCapture(self.camera_source, cv2.CAP_DSHOW)
41
+ if not self._cap.isOpened():
42
+ # Try without backend hint (Linux / macOS)
43
+ self._cap = cv2.VideoCapture(self.camera_source)
44
+ else:
45
+ # IP camera URL
46
+ url = str(self.camera_source).strip()
47
+ # If the user enters a bare IP Webcam URL like "http://10.0.0.249:8080",
48
+ # OpenCV needs the actual video stream endpoint, which is usually "/video".
49
+ if url.startswith("http") and url.count("/") == 2:
50
+ url += "/video"
51
+ elif url.startswith("http") and url.count("/") == 3 and url.endswith("/"):
52
+ url += "video"
53
+ self._cap = cv2.VideoCapture(url)
54
+
55
+ if not self._cap.isOpened():
56
+ self.error.emit(f"Cannot open camera source: {self.camera_source}")
57
+ return
58
+
59
+ # Prefer 720p for a good quality / performance balance
60
+ self._cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1280)
61
+ self._cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 720)
62
+ self._cap.set(cv2.CAP_PROP_FPS, 30)
63
+
64
+ while self._running:
65
+ ret, frame = self._cap.read()
66
+ if not ret:
67
+ self.error.emit("Failed to read frame from camera")
68
+ break
69
+ # Convert BGR → RGB for Qt display
70
+ frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
71
+ self.frame_ready.emit(frame_rgb)
72
+ # ~30 fps → sleep ~33 ms
73
+ self.msleep(33)
74
+
75
+ if self._cap:
76
+ self._cap.release()
77
+ self._cap = None
78
+
79
+ def __del__(self):
80
+ self._running = False
81
+ if self._cap:
82
+ self._cap.release()
@@ -0,0 +1,81 @@
1
+ """
2
+ ChatSession — dataclass for a single chat session (image or video + messages).
3
+ """
4
+ from __future__ import annotations
5
+
6
+ import json
7
+ from dataclasses import dataclass, field
8
+ from datetime import datetime
9
+ from pathlib import Path
10
+ from typing import Literal
11
+
12
+
13
+ @dataclass
14
+ class ChatSession:
15
+ name: str
16
+ media_type: Literal["image", "video"]
17
+ # For image: single data-URL string.
18
+ # For video: list of frame data-URL strings.
19
+ media_data: str | list[str]
20
+ model: str
21
+ messages: list[dict] = field(default_factory=list)
22
+ timestamp: str = field(default_factory=lambda: datetime.now().isoformat())
23
+ saved_at: str = "" # populated at load time from file mtime
24
+
25
+ # ── Serialisation ────────────────────────────────────────────────────────
26
+
27
+ def to_dict(self) -> dict:
28
+ return {
29
+ "name": self.name,
30
+ "media_type": self.media_type,
31
+ "media_data": self.media_data,
32
+ "model": self.model,
33
+ "messages": self.messages,
34
+ "timestamp": self.timestamp,
35
+ }
36
+
37
+ def to_json(self) -> str:
38
+ return json.dumps(self.to_dict(), ensure_ascii=False, indent=2)
39
+
40
+ @classmethod
41
+ def from_dict(cls, data: dict) -> "ChatSession":
42
+ return cls(
43
+ name = data["name"],
44
+ media_type = data["media_type"],
45
+ media_data = data["media_data"],
46
+ model = data["model"],
47
+ messages = data.get("messages", []),
48
+ timestamp = data.get("timestamp", ""),
49
+ )
50
+
51
+ @classmethod
52
+ def from_json(cls, json_str: str) -> "ChatSession":
53
+ return cls.from_dict(json.loads(json_str))
54
+
55
+ # ── Helpers ──────────────────────────────────────────────────────────────
56
+
57
+ @property
58
+ def display_timestamp(self) -> str:
59
+ try:
60
+ dt = datetime.fromisoformat(self.timestamp)
61
+ return dt.strftime("%b %d, %Y %H:%M")
62
+ except ValueError:
63
+ return self.timestamp
64
+
65
+ @property
66
+ def display_saved_at(self) -> str:
67
+ """Display the file modification time, falling back to creation timestamp."""
68
+ if self.saved_at:
69
+ try:
70
+ dt = datetime.fromisoformat(self.saved_at)
71
+ return dt.strftime("%b %d, %Y %H:%M")
72
+ except ValueError:
73
+ pass
74
+ return self.display_timestamp
75
+
76
+ def get_thumbnail_data_url(self) -> str:
77
+ """Return the first (or only) frame data-URL for thumbnail display."""
78
+ if self.media_type == "image":
79
+ return self.media_data # type: ignore[return-value]
80
+ frames = self.media_data # type: ignore[assignment]
81
+ return frames[0] if frames else ""
@@ -0,0 +1,56 @@
1
+ """
2
+ ChatStore — persists and loads ChatSession objects as JSON files
3
+ in the `chats/` directory next to the project root.
4
+ """
5
+ from __future__ import annotations
6
+
7
+ import re
8
+ from pathlib import Path
9
+
10
+ from camera_llm.chat_session import ChatSession
11
+
12
+ CHATS_DIR = Path(__file__).parent.parent / "chats"
13
+
14
+
15
+ def _ensure_dir() -> None:
16
+ CHATS_DIR.mkdir(parents=True, exist_ok=True)
17
+
18
+
19
+ def _safe_filename(name: str) -> str:
20
+ """Strip characters that are not safe for filenames."""
21
+ safe = re.sub(r'[\\/:*?"<>|]', "_", name).strip()
22
+ return safe or "untitled"
23
+
24
+
25
+ def save(session: ChatSession) -> Path:
26
+ """Write a ChatSession to a JSON file. Returns the file path."""
27
+ _ensure_dir()
28
+ filename = _safe_filename(session.name) + ".json"
29
+ path = CHATS_DIR / filename
30
+ path.write_text(session.to_json(), encoding="utf-8")
31
+ return path
32
+
33
+
34
+ def load_all() -> list[ChatSession]:
35
+ """Load every saved ChatSession from the chats/ directory."""
36
+ _ensure_dir()
37
+ sessions: list[ChatSession] = []
38
+ for fpath in sorted(CHATS_DIR.glob("*.json"), key=lambda p: p.stat().st_mtime, reverse=True):
39
+ try:
40
+ session = ChatSession.from_json(fpath.read_text(encoding="utf-8"))
41
+ from datetime import datetime
42
+ session.saved_at = datetime.fromtimestamp(fpath.stat().st_mtime).isoformat()
43
+ sessions.append(session)
44
+ except Exception:
45
+ pass # skip malformed files
46
+ return sessions
47
+
48
+
49
+ def delete(name: str) -> bool:
50
+ """Delete a saved chat by its name. Returns True if deleted."""
51
+ filename = _safe_filename(name) + ".json"
52
+ path = CHATS_DIR / filename
53
+ if path.exists():
54
+ path.unlink()
55
+ return True
56
+ return False
camera_llm/cli.py ADDED
@@ -0,0 +1,72 @@
1
+ """
2
+ Camera → LLM Inference App
3
+ Entry point — creates the QApplication, applies the dark theme, and shows the MainWindow.
4
+
5
+ Usage:
6
+ python main.py
7
+ """
8
+ import sys
9
+ import os
10
+
11
+ # Ensure the project root is on the path
12
+ sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
13
+
14
+ from PySide6.QtWidgets import QApplication
15
+ from PySide6.QtGui import QFont, QIcon
16
+ from PySide6.QtCore import Qt
17
+
18
+ from camera_llm.main_window import MainWindow
19
+
20
+ # def get_resource_path(relative_path):
21
+ # """Get path to resource, works for both dev and PyInstaller bundle."""
22
+ # if hasattr(sys, '_MEIPASS'):
23
+ # # PyInstaller extracts files to a temp folder (_MEIPASS) at runtime
24
+ # return os.path.join(sys._MEIPASS, relative_path)
25
+ # return os.path.join(os.path.dirname(os.path.abspath(__file__)), relative_path)
26
+
27
+
28
+ def run_app():
29
+ # High-DPI support
30
+ QApplication.setHighDpiScaleFactorRoundingPolicy(
31
+ Qt.HighDpiScaleFactorRoundingPolicy.PassThrough
32
+ )
33
+
34
+ app = QApplication(sys.argv)
35
+ app.setApplicationName("Camera LLM Inference")
36
+ app.setOrganizationName("CameraLLM")
37
+
38
+ # Set default font
39
+ font = QFont("Segoe UI", 10)
40
+ font.setHintingPreference(QFont.HintingPreference.PreferNoHinting)
41
+ app.setFont(font)
42
+
43
+ # Set app-wide icon (affects taskbar, window title bar, etc.)
44
+ icon_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "icon.ico")
45
+
46
+ if os.path.exists(icon_path):
47
+ app.setWindowIcon(QIcon(icon_path))
48
+
49
+ # Apply dark theme via pyqtdarktheme as a base, then layer our custom stylesheet
50
+ try:
51
+ import qdarktheme
52
+ base_sheet = qdarktheme.load_stylesheet("dark")
53
+ app.setStyleSheet(base_sheet)
54
+ except (ImportError, Exception):
55
+ pass # Our MAIN_STYLESHEET in styles.py covers everything standalone
56
+
57
+ window = MainWindow()
58
+ window.show()
59
+
60
+ sys.exit(app.exec())
61
+
62
+ def cli():
63
+ if len(sys.argv) > 1 and sys.argv[1] == "run":
64
+ # Remove 'run' so QApplication doesn't try to parse it
65
+ sys.argv = [sys.argv[0]] + sys.argv[2:]
66
+ run_app()
67
+ else:
68
+ print("Usage: camera-llm run")
69
+ sys.exit(1)
70
+
71
+ if __name__ == "__main__":
72
+ run_app()
camera_llm/icon.ico ADDED
Binary file
@@ -0,0 +1,165 @@
1
+ """
2
+ LMStudioClient — wraps the openai Python client pointed at a local LM Studio server.
3
+ All image/video encoding is done fully in-memory (no files written to disk).
4
+ """
5
+ from __future__ import annotations
6
+
7
+ import base64
8
+ from typing import Generator
9
+
10
+ import cv2
11
+ import numpy as np
12
+ from openai import OpenAI, OpenAIError
13
+
14
+
15
+ class LMStudioClient:
16
+ """Interface to LM Studio's OpenAI-compatible local API."""
17
+
18
+ def __init__(self, base_url: str = "http://localhost:1234/v1"):
19
+ self.base_url = base_url
20
+ self._client = self._make_client(base_url)
21
+
22
+ # ── Connection ───────────────────────────────────────────────────────────
23
+
24
+ def set_base_url(self, base_url: str) -> None:
25
+ self.base_url = base_url
26
+ self._client = self._make_client(base_url)
27
+
28
+ def _make_client(self, base_url: str) -> OpenAI:
29
+ return OpenAI(base_url=base_url, api_key="lm-studio")
30
+
31
+ # ── Model discovery ──────────────────────────────────────────────────────
32
+
33
+ def list_models(self) -> list[str]:
34
+ """Return model IDs available on the local server."""
35
+ try:
36
+ models = self._client.models.list()
37
+ return [m.id for m in models.data]
38
+ except OpenAIError as exc:
39
+ raise ConnectionError(
40
+ f"Cannot reach LM Studio at {self.base_url}.\n"
41
+ f"Make sure LM Studio is running and the server is started.\n\nDetail: {exc}"
42
+ ) from exc
43
+
44
+ # ── Encoding helpers (fully in-memory) ───────────────────────────────────
45
+
46
+ @staticmethod
47
+ def encode_frame(frame: np.ndarray, quality: int = 85) -> str:
48
+ """
49
+ Encode a numpy RGB frame to a base64 JPEG data-URL string.
50
+ No file is written to disk.
51
+ """
52
+ # Convert RGB → BGR for cv2 (cv2 works in BGR)
53
+ bgr = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
54
+ encode_params = [cv2.IMWRITE_JPEG_QUALITY, quality]
55
+ success, buffer = cv2.imencode(".jpg", bgr, encode_params)
56
+ if not success:
57
+ raise RuntimeError("cv2.imencode failed")
58
+ b64 = base64.b64encode(buffer.tobytes()).decode("utf-8")
59
+ return f"data:image/jpeg;base64,{b64}"
60
+
61
+ @staticmethod
62
+ def sample_video_frames(
63
+ frames: list[np.ndarray], max_frames: int = 8
64
+ ) -> list[np.ndarray]:
65
+ """
66
+ Down-sample a list of video frames to at most max_frames evenly spaced frames.
67
+ """
68
+ if not frames:
69
+ return []
70
+ if len(frames) <= max_frames:
71
+ return frames
72
+ step = len(frames) / max_frames
73
+ idxs = [int(i * step) for i in range(max_frames)]
74
+ return [frames[i] for i in idxs]
75
+
76
+ @staticmethod
77
+ def encode_video_frames(
78
+ frames: list[np.ndarray], max_frames: int = 8
79
+ ) -> list[str]:
80
+ """Return a list of base64 data-URL strings for sampled video frames."""
81
+ sampled = LMStudioClient.sample_video_frames(frames, max_frames)
82
+ return [LMStudioClient.encode_frame(f) for f in sampled]
83
+
84
+ @staticmethod
85
+ def encode_video_as_grid(
86
+ frames: list[np.ndarray], max_frames: int = 8, cols: int = 4
87
+ ) -> str:
88
+ """Stitch sampled frames into a single image grid (robust for local VLMs)."""
89
+ sampled = LMStudioClient.sample_video_frames(frames, max_frames)
90
+ if not sampled:
91
+ return ""
92
+
93
+ # Resize to keep the grid reasonable
94
+ target_w, target_h = 320, 180
95
+ resized = [cv2.resize(f, (target_w, target_h)) for f in sampled]
96
+
97
+ # Pad with black frames if needed
98
+ while len(resized) % cols != 0:
99
+ resized.append(np.zeros((target_h, target_w, 3), dtype=np.uint8))
100
+
101
+ rows = []
102
+ for i in range(0, len(resized), cols):
103
+ row = np.hstack(resized[i:i+cols])
104
+ rows.append(row)
105
+
106
+ grid = np.vstack(rows)
107
+ return LMStudioClient.encode_frame(grid)
108
+
109
+ # ── Chat ─────────────────────────────────────────────────────────────────
110
+
111
+ def chat(
112
+ self,
113
+ messages: list[dict],
114
+ model: str,
115
+ stream: bool = True,
116
+ ) -> Generator[str, None, None]:
117
+ """
118
+ Send a messages list to the model and yield token chunks.
119
+ Supports streaming for responsive chat UI.
120
+ """
121
+ try:
122
+ response = self._client.chat.completions.create(
123
+ model=model,
124
+ messages=messages,
125
+ stream=stream,
126
+ temperature=0.7,
127
+ max_tokens=2048,
128
+ )
129
+ if stream:
130
+ for chunk in response:
131
+ delta = chunk.choices[0].delta
132
+ if delta and delta.content:
133
+ yield delta.content
134
+ else:
135
+ yield response.choices[0].message.content or ""
136
+ except OpenAIError as exc:
137
+ raise RuntimeError(f"LLM request failed: {exc}") from exc
138
+
139
+ # ── Build initial vision message ─────────────────────────────────────────
140
+
141
+ @staticmethod
142
+ def build_image_message(data_url: str, user_text: str) -> dict:
143
+ """Construct the first user message containing a still image."""
144
+ return {
145
+ "role": "user",
146
+ "content": [
147
+ {
148
+ "type": "image_url",
149
+ "image_url": {"url": data_url},
150
+ },
151
+ {"type": "text", "text": user_text},
152
+ ],
153
+ }
154
+
155
+ @staticmethod
156
+ def build_video_message(data_urls: list[str], user_text: str) -> dict:
157
+ """Construct the first user message with multiple video frames."""
158
+ content = []
159
+ for i, url in enumerate(data_urls):
160
+ content.append({
161
+ "type": "image_url",
162
+ "image_url": {"url": url},
163
+ })
164
+ content.append({"type": "text", "text": user_text})
165
+ return {"role": "user", "content": content}
@@ -0,0 +1,102 @@
1
+ """
2
+ MainWindow — hosts a QStackedWidget for all 7 screens plus the slide-in side panel.
3
+
4
+ Navigation contract:
5
+ Each screen receives a reference to MainWindow and calls
6
+ self.main_window.navigate_to(SCREEN_ID, **kwargs) to move forward,
7
+ or self.main_window.go_home() to return to Screen 1.
8
+
9
+ Screen indices:
10
+ 0 Screen1_Home
11
+ 1 Screen2_Capture
12
+ 2 Screen3_Crop
13
+ 3 Screen4_ModelSelect
14
+ 4 Screen5_Chat
15
+ 5 Screen6_Save
16
+ 6 Screen7_Done
17
+ """
18
+ from __future__ import annotations
19
+
20
+ from PySide6.QtCore import Qt, QPropertyAnimation, QEasingCurve
21
+ from PySide6.QtWidgets import (
22
+ QMainWindow, QWidget, QHBoxLayout, QStackedWidget, QSizePolicy
23
+ )
24
+
25
+ from camera_llm.styles import MAIN_STYLESHEET
26
+
27
+
28
+ class MainWindow(QMainWindow):
29
+
30
+ # ── Screen indices ───────────────────────────────────────────────────────
31
+ HOME = 0
32
+ CAPTURE = 1
33
+ CROP = 2
34
+ MODEL_SELECT = 3
35
+ CHAT = 4
36
+ SAVE = 5
37
+ DONE = 6
38
+
39
+ def __init__(self):
40
+ super().__init__()
41
+ self.setWindowTitle("Camera → LLM Inference")
42
+ self.resize(1100, 780)
43
+ self.setMinimumSize(900, 640)
44
+ self.setStyleSheet(MAIN_STYLESHEET)
45
+
46
+ # ── Shared state (passed between screens via navigate_to) ────────────
47
+ self.capture_mode: str = "image" # "image" | "video"
48
+ self.captured_frame = None # np.ndarray (image still)
49
+ self.captured_frames: list = [] # list[np.ndarray] (video)
50
+ self.media_data_url = None # str (image data-URL)
51
+ self.media_data_urls: list = [] # list[str] (video data-URLs)
52
+ self.stitched_data_url: str = "" # str (video grid data-URL)
53
+ self.selected_model: str = ""
54
+ self.lm_base_url: str = "http://localhost:1234/v1"
55
+
56
+ # ── Build UI ─────────────────────────────────────────────────────────
57
+ self._root = QWidget()
58
+ self._layout = QHBoxLayout(self._root)
59
+ self._layout.setContentsMargins(0, 0, 0, 0)
60
+ self._layout.setSpacing(0)
61
+ self.setCentralWidget(self._root)
62
+
63
+ # Lazy import screens to avoid circular dependency at module level
64
+ from camera_llm.screens.screen1_home import Screen1_Home
65
+ from camera_llm.screens.screen2_capture import Screen2_Capture
66
+ from camera_llm.screens.screen3_crop import Screen3_Crop
67
+ from camera_llm.screens.screen4_model_select import Screen4_ModelSelect
68
+ from camera_llm.screens.screen5_chat import Screen5_Chat
69
+ from camera_llm.screens.screen6_save import Screen6_Save
70
+ from camera_llm.screens.screen7_done import Screen7_Done
71
+
72
+ self.screen1 = Screen1_Home(self)
73
+ self.screen2 = Screen2_Capture(self)
74
+ self.screen3 = Screen3_Crop(self)
75
+ self.screen4 = Screen4_ModelSelect(self)
76
+ self.screen5 = Screen5_Chat(self)
77
+ self.screen6 = Screen6_Save(self)
78
+ self.screen7 = Screen7_Done(self)
79
+
80
+ self._stack = QStackedWidget()
81
+ for screen in [
82
+ self.screen1, self.screen2, self.screen3, self.screen4,
83
+ self.screen5, self.screen6, self.screen7,
84
+ ]:
85
+ self._stack.addWidget(screen)
86
+
87
+ self._layout.addWidget(self._stack)
88
+ self.navigate_to(self.HOME)
89
+
90
+ # ── Navigation ───────────────────────────────────────────────────────────
91
+
92
+ def navigate_to(self, screen_index: int, **kwargs) -> None:
93
+ """Switch to the given screen and call its on_enter(**kwargs) hook."""
94
+ self._stack.setCurrentIndex(screen_index)
95
+ screen = self._stack.currentWidget()
96
+ if hasattr(screen, "on_enter"):
97
+ screen.on_enter(**kwargs)
98
+
99
+ def go_home(self) -> None:
100
+ """Return to Screen 1 and refresh the saved-chats side panel."""
101
+ self.screen1.refresh_side_panel()
102
+ self._stack.setCurrentIndex(self.HOME)
@@ -0,0 +1 @@
1
+ # Screens package