sightrag 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sightrag/__init__.py ADDED
@@ -0,0 +1,31 @@
1
+ """
2
+ SightRAG — Image and Video RAG
3
+ See. Search. Retrieve.
4
+
5
+ pip install sightrag
6
+
7
+ Usage:
8
+ from sightrag import SightRAG
9
+
10
+ rag = SightRAG()
11
+ rag.index("./photos/")
12
+ results = rag.query("find empty shelf")
13
+
14
+ REST API:
15
+ from sightrag import serve
16
+ serve(port=8000)
17
+ """
18
+
19
+ from .core import SightRAG
20
+
21
+ __version__ = "0.1.0"
22
+ __author__ = "Ant (VK-Ant)"
23
+
24
+
25
+ def serve(host: str = "0.0.0.0", port: int = 8000):
26
+ """Start SightRAG REST API server."""
27
+ from .api import serve as _serve
28
+ _serve(host=host, port=port)
29
+
30
+
31
+ __all__ = ["SightRAG", "serve"]
sightrag/api.py ADDED
@@ -0,0 +1,189 @@
1
+ # sightrag/api.py
2
+ # REST API — FastAPI based
3
+ # Run: sightrag-server or python -m sightrag.api
4
+
5
+ import os
6
+ import json
7
+ import shutil
8
+ import tempfile
9
+ from pathlib import Path
10
+
11
+ try:
12
+ from fastapi import FastAPI, UploadFile, File, Form, HTTPException
13
+ from fastapi.responses import JSONResponse
14
+ import uvicorn
15
+ except ImportError:
16
+ raise ImportError(
17
+ "FastAPI not installed.\n"
18
+ "Run: pip install sightrag[api]"
19
+ )
20
+
21
+ from .core import SightRAG
22
+
23
+ app = FastAPI(
24
+ title="SightRAG API",
25
+ description="See. Search. Retrieve. — Image and Video RAG",
26
+ version="0.1.0"
27
+ )
28
+
29
+ # Global SightRAG instance
30
+ rag = None
31
+
32
+
33
+ def get_rag():
34
+ global rag
35
+ if rag is None:
36
+ store = os.getenv("SIGHTRAG_STORE", "sqlite")
37
+ domain = os.getenv("SIGHTRAG_DOMAIN", None)
38
+ index_path = os.getenv("SIGHTRAG_INDEX", "./sightrag_index")
39
+ rag = SightRAG(store=store, domain_hint=domain, index_path=index_path)
40
+ return rag
41
+
42
+
43
+ @app.get("/")
44
+ def root():
45
+ return {
46
+ "name": "SightRAG API",
47
+ "version": "0.1.0",
48
+ "tagline": "See. Search. Retrieve.",
49
+ "endpoints": {
50
+ "POST /index/folder": "Index an image folder",
51
+ "POST /index/video": "Index a video file",
52
+ "POST /index/upload": "Upload and index images",
53
+ "POST /query/text": "Search with text",
54
+ "POST /query/reference": "Search with reference image",
55
+ "GET /status": "Index status",
56
+ "DELETE /index": "Clear index"
57
+ }
58
+ }
59
+
60
+
61
+ @app.get("/status")
62
+ def status():
63
+ r = get_rag()
64
+ return {
65
+ "indexed_regions": r.count(),
66
+ "store": r._store_type,
67
+ "domain_hint": r.domain_hint
68
+ }
69
+
70
+
71
+ @app.post("/index/folder")
72
+ def index_folder(path: str = Form(...)):
73
+ """Index all images in a folder."""
74
+ r = get_rag()
75
+ try:
76
+ r.index(path)
77
+ return {
78
+ "status": "success",
79
+ "indexed_regions": r.count(),
80
+ "source": path
81
+ }
82
+ except Exception as e:
83
+ raise HTTPException(status_code=400, detail=str(e))
84
+
85
+
86
+ @app.post("/index/video")
87
+ def index_video(path: str = Form(...), fps: int = Form(1)):
88
+ """Index a video file."""
89
+ r = get_rag()
90
+ try:
91
+ r.index(path, fps=fps)
92
+ return {
93
+ "status": "success",
94
+ "indexed_regions": r.count(),
95
+ "source": path,
96
+ "fps": fps
97
+ }
98
+ except Exception as e:
99
+ raise HTTPException(status_code=400, detail=str(e))
100
+
101
+
102
+ @app.post("/index/upload")
103
+ async def index_upload(files: list[UploadFile] = File(...)):
104
+ """Upload and index images directly."""
105
+ r = get_rag()
106
+ upload_dir = tempfile.mkdtemp(prefix="sightrag_upload_")
107
+
108
+ try:
109
+ # Save uploaded files
110
+ for f in files:
111
+ file_path = os.path.join(upload_dir, f.filename)
112
+ with open(file_path, "wb") as out:
113
+ content = await f.read()
114
+ out.write(content)
115
+
116
+ # Index the upload folder
117
+ r.index(upload_dir)
118
+
119
+ return {
120
+ "status": "success",
121
+ "files_uploaded": len(files),
122
+ "indexed_regions": r.count()
123
+ }
124
+ except Exception as e:
125
+ raise HTTPException(status_code=400, detail=str(e))
126
+ finally:
127
+ shutil.rmtree(upload_dir, ignore_errors=True)
128
+
129
+
130
+ @app.post("/query/text")
131
+ def query_text(text: str = Form(...), top_k: int = Form(5)):
132
+ """Search with plain English text."""
133
+ r = get_rag()
134
+ try:
135
+ results = r.query(text=text, top_k=top_k)
136
+ return {
137
+ "query": text,
138
+ "results": results,
139
+ "count": len(results)
140
+ }
141
+ except Exception as e:
142
+ raise HTTPException(status_code=400, detail=str(e))
143
+
144
+
145
+ @app.post("/query/reference")
146
+ async def query_reference(
147
+ file: UploadFile = File(...),
148
+ top_k: int = Form(5)
149
+ ):
150
+ """Search using a reference image."""
151
+ r = get_rag()
152
+
153
+ # Save reference temporarily
154
+ tmp = tempfile.NamedTemporaryFile(
155
+ delete=False, suffix=f"_{file.filename}"
156
+ )
157
+ try:
158
+ content = await file.read()
159
+ tmp.write(content)
160
+ tmp.close()
161
+
162
+ results = r.query(reference=tmp.name, top_k=top_k)
163
+ return {
164
+ "reference": file.filename,
165
+ "results": results,
166
+ "count": len(results)
167
+ }
168
+ except Exception as e:
169
+ raise HTTPException(status_code=400, detail=str(e))
170
+ finally:
171
+ os.unlink(tmp.name)
172
+
173
+
174
+ @app.delete("/index")
175
+ def clear_index():
176
+ """Clear all indexed data."""
177
+ r = get_rag()
178
+ r.clear()
179
+ return {"status": "cleared", "indexed_regions": 0}
180
+
181
+
182
+ def serve(host: str = "0.0.0.0", port: int = 8000):
183
+ """Start the SightRAG API server."""
184
+ print(f"[SightRAG] Starting API server on {host}:{port}")
185
+ uvicorn.run(app, host=host, port=port)
186
+
187
+
188
+ if __name__ == "__main__":
189
+ serve()
sightrag/core.py ADDED
@@ -0,0 +1,129 @@
1
+ # sightrag/core.py
2
+ # Main SightRAG class
3
+ # All data stored in ~/.sightrag/ — project folder stays clean
4
+
5
+ import os
6
+ from pathlib import Path
7
+ from .detector import Detector
8
+ from .embedder import Embedder
9
+ from .indexer import Indexer
10
+ from .retriever import Retriever
11
+
12
+ SIGHTRAG_HOME = os.path.join(Path.home(), ".sightrag")
13
+
14
+
15
+ class SightRAG:
16
+ """
17
+ SightRAG — Image and Video RAG.
18
+ See. Search. Retrieve.
19
+
20
+ Usage:
21
+ rag = SightRAG()
22
+ rag.index("./photos/")
23
+ results = rag.query("find empty shelf")
24
+ """
25
+
26
+ def __init__(self,
27
+ store: str = "sqlite",
28
+ domain_hint: str = None,
29
+ index_path: str = None):
30
+
31
+ self.domain_hint = domain_hint
32
+ self._store_type = store
33
+
34
+ if index_path is None:
35
+ self._index_path = os.path.join(SIGHTRAG_HOME, "index")
36
+ else:
37
+ self._index_path = index_path
38
+
39
+ os.makedirs(SIGHTRAG_HOME, exist_ok=True)
40
+
41
+ print("[SightRAG] Initializing...")
42
+ self._detector = Detector()
43
+ self._embedder = Embedder()
44
+ self._store = self._init_store(store, self._index_path)
45
+ self._indexer = Indexer(
46
+ self._detector, self._embedder,
47
+ self._store, domain_hint
48
+ )
49
+ self._retriever = Retriever(
50
+ self._embedder, self._detector,
51
+ self._store, domain_hint
52
+ )
53
+ print("[SightRAG] Ready.")
54
+
55
+ def _init_store(self, store_type: str, path: str):
56
+ if store_type == "chroma":
57
+ try:
58
+ from .store.chroma_store import ChromaStore
59
+ return ChromaStore(path)
60
+ except ImportError:
61
+ print("[SightRAG] ChromaDB not found. Using SQLite.")
62
+ from .store.sqlite_store import SQLiteStore
63
+ return SQLiteStore(path)
64
+ elif store_type == "sqlite":
65
+ from .store.sqlite_store import SQLiteStore
66
+ return SQLiteStore(path)
67
+ else:
68
+ raise ValueError(f"Unknown store: {store_type}. Use 'chroma' or 'sqlite'.")
69
+
70
+ def index(self, path: str = None, source: str = None,
71
+ camera_id: int = 0, fps: int = 1):
72
+ """Index images, video, or camera."""
73
+ if source == "camera":
74
+ self._indexer.index_camera(camera_id=camera_id, fps=fps)
75
+ return self
76
+
77
+ if path is None:
78
+ raise ValueError("Provide a path or source='camera'")
79
+
80
+ if os.path.isdir(path):
81
+ self._indexer.index_folder(path, fps=fps)
82
+ elif os.path.isfile(path):
83
+ ext = os.path.splitext(path)[1].lower()
84
+ if ext in {".mp4", ".avi", ".mov", ".mkv"}:
85
+ self._indexer.index_video(path, fps=fps)
86
+ else:
87
+ from .utils.image import load_image
88
+ image = load_image(path)
89
+ self._index_single_image(path, image)
90
+ print(f"[SightRAG] 1 image indexed. Total: {self.count()} regions.")
91
+ else:
92
+ raise FileNotFoundError(f"Path not found: {path}")
93
+
94
+ return self
95
+
96
+ def _index_single_image(self, path, image):
97
+ """Index one image with detection + embedding."""
98
+ import numpy as np
99
+ regions = self._detector.detect(image)
100
+ for j, region in enumerate(regions):
101
+ embedding = self._embedder.embed_image(region["crop"])
102
+ if not np.allclose(embedding, 0):
103
+ self._store.add(f"img_{j}", embedding, {
104
+ "image_path": str(path),
105
+ "bbox": region["bbox"],
106
+ "label": region["label"],
107
+ "confidence": region["confidence"],
108
+ "source_type": "image"
109
+ })
110
+
111
+ def query(self, text: str = None, reference: str = None, top_k: int = 5):
112
+ """Search indexed content with text or reference image."""
113
+ if text is None and reference is None:
114
+ raise ValueError("Provide text or reference image.")
115
+ if text:
116
+ return self._retriever.query_text(text, top_k)
117
+ else:
118
+ return self._retriever.query_reference(reference, top_k)
119
+
120
+ def count(self) -> int:
121
+ return self._store.count()
122
+
123
+ def clear(self):
124
+ self._store.clear()
125
+ print("[SightRAG] Index cleared.")
126
+ return self
127
+
128
+ def __repr__(self):
129
+ return f"SightRAG(store='{self._store_type}', indexed={self.count()} regions)"
sightrag/detector.py ADDED
@@ -0,0 +1,81 @@
1
+ # sightrag/detector.py
2
+ # YOLO detection — models stored in ~/.sightrag/models/
3
+
4
+ import os
5
+ import numpy as np
6
+ from PIL import Image
7
+ from pathlib import Path
8
+
9
+
10
+ MODEL_DIR = os.path.join(Path.home(), ".sightrag", "models")
11
+
12
+
13
+ class Detector:
14
+ """YOLO object detector with whole-image fallback."""
15
+
16
+ def __init__(self, model_size: str = "yolo11n.pt", model_dir: str = None):
17
+ self.model = None
18
+ self.model_size = model_size
19
+ self.model_dir = model_dir or MODEL_DIR
20
+ os.makedirs(self.model_dir, exist_ok=True)
21
+ self._load()
22
+
23
+ def _load(self):
24
+ try:
25
+ from ultralytics import YOLO
26
+ import logging
27
+ logging.getLogger("ultralytics").setLevel(logging.WARNING)
28
+
29
+ model_path = os.path.join(self.model_dir, self.model_size)
30
+
31
+ if os.path.exists(model_path):
32
+ self.model = YOLO(model_path)
33
+ else:
34
+ # Download and move to our folder
35
+ self.model = YOLO(self.model_size)
36
+ # Move .pt file from current dir to model_dir
37
+ cwd_model = os.path.join(os.getcwd(), self.model_size)
38
+ if os.path.exists(cwd_model) and cwd_model != model_path:
39
+ import shutil
40
+ shutil.move(cwd_model, model_path)
41
+
42
+ except Exception as e:
43
+ print(f"[SightRAG] YOLO not available: {str(e)[:100]}")
44
+ self.model = None
45
+
46
+ def detect(self, image: Image.Image, confidence: float = 0.25):
47
+ """Detect objects. Always returns at least whole image."""
48
+ regions = []
49
+
50
+ if self.model is not None:
51
+ try:
52
+ results = self.model(image, conf=confidence, verbose=False)
53
+ for result in results:
54
+ if result.boxes is None or len(result.boxes) == 0:
55
+ continue
56
+ for box in result.boxes:
57
+ x1, y1, x2, y2 = map(int, box.xyxy[0].tolist())
58
+ w, h = image.size
59
+ x1, y1 = max(0, x1), max(0, y1)
60
+ x2, y2 = min(w, x2), min(h, y2)
61
+ if (x2 - x1) < 10 or (y2 - y1) < 10:
62
+ continue
63
+ regions.append({
64
+ "crop": image.crop((x1, y1, x2, y2)),
65
+ "bbox": [x1, y1, x2, y2],
66
+ "label": result.names[int(box.cls[0])],
67
+ "confidence": float(box.conf[0])
68
+ })
69
+ except:
70
+ pass
71
+
72
+ # Always add whole image
73
+ w, h = image.size
74
+ regions.append({
75
+ "crop": image,
76
+ "bbox": [0, 0, w, h],
77
+ "label": "whole_image",
78
+ "confidence": 1.0
79
+ })
80
+
81
+ return regions
sightrag/embedder.py ADDED
@@ -0,0 +1,83 @@
1
+ # sightrag/embedder.py
2
+ # CLIP embedder — works across all transformers versions
3
+
4
+ import os
5
+ import numpy as np
6
+ from PIL import Image
7
+
8
+ os.environ["TRANSFORMERS_VERBOSITY"] = "error"
9
+ os.environ["TOKENIZERS_PARALLELISM"] = "false"
10
+
11
+
12
+ class Embedder:
13
+ """CLIP embedder for images and text queries."""
14
+
15
+ def __init__(self, model_name: str = "openai/clip-vit-base-patch32"):
16
+ self.model = None
17
+ self.processor = None
18
+ self.embed_dim = None
19
+ self._load(model_name)
20
+
21
+ def _load(self, model_name):
22
+ import warnings
23
+ warnings.filterwarnings("ignore")
24
+
25
+ from transformers import CLIPModel, CLIPProcessor
26
+ self.model = CLIPModel.from_pretrained(model_name)
27
+ self.processor = CLIPProcessor.from_pretrained(model_name)
28
+ self.model.eval()
29
+ self.embed_dim = self.model.config.projection_dim
30
+
31
+ def embed_image(self, image: Image.Image) -> np.ndarray:
32
+ """Embed image → fixed-size normalized vector."""
33
+ import torch
34
+ try:
35
+ if image.mode != "RGB":
36
+ image = image.convert("RGB")
37
+
38
+ # Get pixel values
39
+ inputs = self.processor(images=image, return_tensors="pt", padding=True)
40
+ pixel_values = inputs["pixel_values"]
41
+
42
+ with torch.no_grad():
43
+ # Use vision model + projection explicitly
44
+ # This guarantees correct output dim across all versions
45
+ vision_out = self.model.vision_model(pixel_values=pixel_values)
46
+ pooled = vision_out.pooler_output # (1, hidden_dim)
47
+ projected = self.model.visual_projection(pooled) # (1, projection_dim)
48
+
49
+ emb = projected[0].detach().cpu().numpy().astype(np.float32)
50
+ norm = np.linalg.norm(emb)
51
+ return emb / norm if norm > 0 else emb
52
+
53
+ except Exception as e:
54
+ print(f"[SightRAG] Image embed error: {e}")
55
+ return np.zeros(self.embed_dim, dtype=np.float32)
56
+
57
+ def embed_text(self, text: str, domain_hint: str = None) -> np.ndarray:
58
+ """Embed text query → fixed-size normalized vector."""
59
+ import torch
60
+ try:
61
+ query = f"{text} {domain_hint}" if domain_hint else text
62
+
63
+ inputs = self.processor(
64
+ text=[query], return_tensors="pt",
65
+ padding=True, truncation=True
66
+ )
67
+
68
+ with torch.no_grad():
69
+ # Use text model + projection explicitly
70
+ text_out = self.model.text_model(
71
+ input_ids=inputs["input_ids"],
72
+ attention_mask=inputs["attention_mask"]
73
+ )
74
+ pooled = text_out.pooler_output # (1, hidden_dim)
75
+ projected = self.model.text_projection(pooled) # (1, projection_dim)
76
+
77
+ emb = projected[0].detach().cpu().numpy().astype(np.float32)
78
+ norm = np.linalg.norm(emb)
79
+ return emb / norm if norm > 0 else emb
80
+
81
+ except Exception as e:
82
+ print(f"[SightRAG] Text embed error: {e}")
83
+ return np.zeros(self.embed_dim, dtype=np.float32)
sightrag/indexer.py ADDED
@@ -0,0 +1,147 @@
1
+ # sightrag/indexer.py
2
+ # Image, video, camera indexing — clean output only
3
+
4
+ import os
5
+ import numpy as np
6
+ from pathlib import Path
7
+ from .detector import Detector
8
+ from .embedder import Embedder
9
+ from .utils.image import load_image, SUPPORTED_FORMATS as IMAGE_FORMATS
10
+ from .utils.video import extract_frames, SUPPORTED_FORMATS as VIDEO_FORMATS
11
+
12
+
13
+ class Indexer:
14
+ """Indexes images, videos, and camera frames."""
15
+
16
+ def __init__(self, detector, embedder, store, domain_hint=None):
17
+ self.detector = detector
18
+ self.embedder = embedder
19
+ self.store = store
20
+ self.domain_hint = domain_hint
21
+
22
+ def _index_image(self, path_str, image, prefix):
23
+ """Index one image — detect regions, embed, store."""
24
+ regions = self.detector.detect(image)
25
+ count = 0
26
+ for j, region in enumerate(regions):
27
+ embedding = self.embedder.embed_image(region["crop"])
28
+ if not np.allclose(embedding, 0):
29
+ self.store.add(f"{prefix}_{j}", embedding, {
30
+ "image_path": path_str,
31
+ "bbox": region["bbox"],
32
+ "label": region["label"],
33
+ "confidence": region["confidence"],
34
+ "source_type": "image"
35
+ })
36
+ count += 1
37
+ return count
38
+
39
+ def index_folder(self, folder_path: str, fps: int = 1):
40
+ """Index all images AND videos in a folder."""
41
+ folder = Path(folder_path)
42
+ if not folder.exists():
43
+ raise FileNotFoundError(f"Folder not found: {folder}")
44
+ if not folder.is_dir():
45
+ raise ValueError(f"Not a folder: {folder}")
46
+
47
+ # Find images
48
+ image_paths = []
49
+ for fmt in IMAGE_FORMATS:
50
+ image_paths.extend(folder.glob(f"*{fmt}"))
51
+ image_paths.extend(folder.glob(f"*{fmt.upper()}"))
52
+ image_paths = sorted(set(image_paths))
53
+
54
+ # Find videos
55
+ video_paths = []
56
+ for fmt in VIDEO_FORMATS:
57
+ video_paths.extend(folder.glob(f"*{fmt}"))
58
+ video_paths.extend(folder.glob(f"*{fmt.upper()}"))
59
+ video_paths = sorted(set(video_paths))
60
+
61
+ if not image_paths and not video_paths:
62
+ raise ValueError(f"No images or videos in {folder}")
63
+
64
+ print(f"[SightRAG] Found {len(image_paths)} images, {len(video_paths)} videos")
65
+
66
+ # Index images
67
+ if image_paths:
68
+ total = len(image_paths)
69
+ for i, path in enumerate(image_paths, 1):
70
+ try:
71
+ image = load_image(str(path))
72
+ self._index_image(str(path), image, path.stem)
73
+ pct = int((i / total) * 40)
74
+ bar = "█" * pct + "░" * (40 - pct)
75
+ print(f"\r [{bar}] {i}/{total} images", end="", flush=True)
76
+ except Exception as e:
77
+ print(f"\n Skipping {path.name}: {e}")
78
+ print()
79
+
80
+ # Index videos
81
+ if video_paths:
82
+ for v_idx, vpath in enumerate(video_paths, 1):
83
+ try:
84
+ print(f"[SightRAG] Video {v_idx}/{len(video_paths)}: {vpath.name}")
85
+ self._index_video(str(vpath), fps)
86
+ except Exception as e:
87
+ print(f" Skipping {vpath.name}: {e}")
88
+
89
+ print(f"[SightRAG] Done. {self.store.count()} regions indexed.")
90
+
91
+ def index_video(self, video_path: str, fps: int = 1):
92
+ """Index a single video file."""
93
+ self._index_video(video_path, fps)
94
+ print(f"[SightRAG] Done. {self.store.count()} regions indexed.")
95
+
96
+ def _index_video(self, video_path: str, fps: int = 1):
97
+ """Internal video indexing."""
98
+ video_name = Path(video_path).stem
99
+ frames = extract_frames(video_path, fps=fps)
100
+ total = len(frames)
101
+ print(f" {total} frames extracted...")
102
+
103
+ for i, (image, timestamp) in enumerate(frames, 1):
104
+ try:
105
+ regions = self.detector.detect(image)
106
+ for j, region in enumerate(regions):
107
+ embedding = self.embedder.embed_image(region["crop"])
108
+ if not np.allclose(embedding, 0):
109
+ self.store.add(f"{video_name}_f{i}_r{j}", embedding, {
110
+ "image_path": video_path,
111
+ "bbox": region["bbox"],
112
+ "label": region["label"],
113
+ "confidence": region["confidence"],
114
+ "timestamp": timestamp,
115
+ "source_type": "video"
116
+ })
117
+ pct = int((i / total) * 40)
118
+ bar = "█" * pct + "░" * (40 - pct)
119
+ print(f"\r [{bar}] {i}/{total} frames", end="", flush=True)
120
+ except:
121
+ pass
122
+ print()
123
+
124
+ def index_camera(self, camera_id=0, fps=1, buffer_seconds=60):
125
+ """Index live camera frames."""
126
+ from .utils.camera import capture_frames
127
+ print(f"[SightRAG] Camera {camera_id}. Press Ctrl+C to stop.")
128
+
129
+ count = 0
130
+ try:
131
+ for image, timestamp in capture_frames(camera_id, fps, buffer_seconds):
132
+ regions = self.detector.detect(image)
133
+ for j, region in enumerate(regions):
134
+ embedding = self.embedder.embed_image(region["crop"])
135
+ if not np.allclose(embedding, 0):
136
+ self.store.add(f"cam{camera_id}_{timestamp}_{j}", embedding, {
137
+ "image_path": f"camera_{camera_id}",
138
+ "bbox": region["bbox"],
139
+ "label": region["label"],
140
+ "confidence": region["confidence"],
141
+ "timestamp": timestamp,
142
+ "source_type": "camera"
143
+ })
144
+ count += 1
145
+ print(f"\r[SightRAG] {count} frames | {timestamp}", end="", flush=True)
146
+ except KeyboardInterrupt:
147
+ print(f"\n[SightRAG] Stopped. {self.store.count()} regions indexed.")