superlocalmemory 2.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ATTRIBUTION.md +140 -0
- package/CHANGELOG.md +1749 -0
- package/LICENSE +21 -0
- package/README.md +600 -0
- package/bin/aider-smart +72 -0
- package/bin/slm +202 -0
- package/bin/slm-npm +73 -0
- package/bin/slm.bat +195 -0
- package/bin/slm.cmd +10 -0
- package/bin/superlocalmemoryv2:list +3 -0
- package/bin/superlocalmemoryv2:profile +3 -0
- package/bin/superlocalmemoryv2:recall +3 -0
- package/bin/superlocalmemoryv2:remember +3 -0
- package/bin/superlocalmemoryv2:reset +3 -0
- package/bin/superlocalmemoryv2:status +3 -0
- package/completions/slm.bash +58 -0
- package/completions/slm.zsh +76 -0
- package/configs/antigravity-mcp.json +13 -0
- package/configs/chatgpt-desktop-mcp.json +7 -0
- package/configs/claude-desktop-mcp.json +15 -0
- package/configs/codex-mcp.toml +13 -0
- package/configs/cody-commands.json +29 -0
- package/configs/continue-mcp.yaml +14 -0
- package/configs/continue-skills.yaml +26 -0
- package/configs/cursor-mcp.json +15 -0
- package/configs/gemini-cli-mcp.json +11 -0
- package/configs/jetbrains-mcp.json +11 -0
- package/configs/opencode-mcp.json +12 -0
- package/configs/perplexity-mcp.json +9 -0
- package/configs/vscode-copilot-mcp.json +12 -0
- package/configs/windsurf-mcp.json +16 -0
- package/configs/zed-mcp.json +12 -0
- package/docs/ARCHITECTURE.md +877 -0
- package/docs/CLI-COMMANDS-REFERENCE.md +425 -0
- package/docs/COMPETITIVE-ANALYSIS.md +210 -0
- package/docs/COMPRESSION-README.md +390 -0
- package/docs/GRAPH-ENGINE.md +503 -0
- package/docs/MCP-MANUAL-SETUP.md +720 -0
- package/docs/MCP-TROUBLESHOOTING.md +787 -0
- package/docs/PATTERN-LEARNING.md +363 -0
- package/docs/PROFILES-GUIDE.md +453 -0
- package/docs/RESET-GUIDE.md +353 -0
- package/docs/SEARCH-ENGINE-V2.2.0.md +748 -0
- package/docs/SEARCH-INTEGRATION-GUIDE.md +502 -0
- package/docs/UI-SERVER.md +254 -0
- package/docs/UNIVERSAL-INTEGRATION.md +432 -0
- package/docs/V2.2.0-OPTIONAL-SEARCH.md +666 -0
- package/docs/WINDOWS-INSTALL-README.txt +34 -0
- package/docs/WINDOWS-POST-INSTALL.txt +45 -0
- package/docs/example_graph_usage.py +148 -0
- package/hooks/memory-list-skill.js +130 -0
- package/hooks/memory-profile-skill.js +284 -0
- package/hooks/memory-recall-skill.js +109 -0
- package/hooks/memory-remember-skill.js +127 -0
- package/hooks/memory-reset-skill.js +274 -0
- package/install-skills.sh +436 -0
- package/install.ps1 +417 -0
- package/install.sh +755 -0
- package/mcp_server.py +585 -0
- package/package.json +94 -0
- package/requirements-core.txt +24 -0
- package/requirements.txt +10 -0
- package/scripts/postinstall.js +126 -0
- package/scripts/preuninstall.js +57 -0
- package/skills/slm-build-graph/SKILL.md +423 -0
- package/skills/slm-list-recent/SKILL.md +348 -0
- package/skills/slm-recall/SKILL.md +325 -0
- package/skills/slm-remember/SKILL.md +194 -0
- package/skills/slm-status/SKILL.md +363 -0
- package/skills/slm-switch-profile/SKILL.md +442 -0
- package/src/__pycache__/cache_manager.cpython-312.pyc +0 -0
- package/src/__pycache__/embedding_engine.cpython-312.pyc +0 -0
- package/src/__pycache__/graph_engine.cpython-312.pyc +0 -0
- package/src/__pycache__/hnsw_index.cpython-312.pyc +0 -0
- package/src/__pycache__/hybrid_search.cpython-312.pyc +0 -0
- package/src/__pycache__/memory-profiles.cpython-312.pyc +0 -0
- package/src/__pycache__/memory-reset.cpython-312.pyc +0 -0
- package/src/__pycache__/memory_compression.cpython-312.pyc +0 -0
- package/src/__pycache__/memory_store_v2.cpython-312.pyc +0 -0
- package/src/__pycache__/migrate_v1_to_v2.cpython-312.pyc +0 -0
- package/src/__pycache__/pattern_learner.cpython-312.pyc +0 -0
- package/src/__pycache__/query_optimizer.cpython-312.pyc +0 -0
- package/src/__pycache__/search_engine_v2.cpython-312.pyc +0 -0
- package/src/__pycache__/setup_validator.cpython-312.pyc +0 -0
- package/src/__pycache__/tree_manager.cpython-312.pyc +0 -0
- package/src/cache_manager.py +520 -0
- package/src/embedding_engine.py +671 -0
- package/src/graph_engine.py +970 -0
- package/src/hnsw_index.py +626 -0
- package/src/hybrid_search.py +693 -0
- package/src/memory-profiles.py +518 -0
- package/src/memory-reset.py +485 -0
- package/src/memory_compression.py +999 -0
- package/src/memory_store_v2.py +1088 -0
- package/src/migrate_v1_to_v2.py +638 -0
- package/src/pattern_learner.py +898 -0
- package/src/query_optimizer.py +513 -0
- package/src/search_engine_v2.py +403 -0
- package/src/setup_validator.py +479 -0
- package/src/tree_manager.py +720 -0
|
@@ -0,0 +1,671 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
EmbeddingEngine - Local Embedding Generation for SuperLocalMemory V2
|
|
4
|
+
|
|
5
|
+
Copyright (c) 2026 Varun Pratap Bhardwaj
|
|
6
|
+
Licensed under MIT License
|
|
7
|
+
Repository: https://github.com/varun369/SuperLocalMemoryV2
|
|
8
|
+
|
|
9
|
+
Implements local embedding generation using sentence-transformers:
|
|
10
|
+
- all-MiniLM-L6-v2 model (384 dimensions, 80MB)
|
|
11
|
+
- Batch processing for efficiency
|
|
12
|
+
- GPU acceleration with automatic detection
|
|
13
|
+
- Disk caching for repeated queries
|
|
14
|
+
- Graceful fallback to TF-IDF if unavailable
|
|
15
|
+
|
|
16
|
+
All processing is local - no external APIs required.
|
|
17
|
+
|
|
18
|
+
LIMITS:
|
|
19
|
+
- MAX_BATCH_SIZE: 128 (prevents memory exhaustion)
|
|
20
|
+
- MAX_TEXT_LENGTH: 10,000 characters per input
|
|
21
|
+
- CACHE_MAX_SIZE: 10,000 entries (LRU eviction)
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
# SECURITY: Embedding generation limits to prevent resource exhaustion
|
|
25
|
+
MAX_BATCH_SIZE = 128
|
|
26
|
+
MAX_TEXT_LENGTH = 10_000
|
|
27
|
+
CACHE_MAX_SIZE = 10_000
|
|
28
|
+
|
|
29
|
+
import sqlite3
|
|
30
|
+
import json
|
|
31
|
+
import time
|
|
32
|
+
import logging
|
|
33
|
+
import hashlib
|
|
34
|
+
from datetime import datetime
|
|
35
|
+
from pathlib import Path
|
|
36
|
+
from typing import List, Dict, Optional, Union, Tuple
|
|
37
|
+
from collections import OrderedDict
|
|
38
|
+
import numpy as np
|
|
39
|
+
|
|
40
|
+
# Optional sentence-transformers dependency
|
|
41
|
+
SENTENCE_TRANSFORMERS_AVAILABLE = False
|
|
42
|
+
try:
|
|
43
|
+
from sentence_transformers import SentenceTransformer
|
|
44
|
+
SENTENCE_TRANSFORMERS_AVAILABLE = True
|
|
45
|
+
except ImportError:
|
|
46
|
+
SENTENCE_TRANSFORMERS_AVAILABLE = False
|
|
47
|
+
# Graceful degradation - will use TF-IDF fallback
|
|
48
|
+
|
|
49
|
+
# Fallback: TF-IDF vectorization
|
|
50
|
+
SKLEARN_AVAILABLE = False
|
|
51
|
+
try:
|
|
52
|
+
from sklearn.feature_extraction.text import TfidfVectorizer
|
|
53
|
+
SKLEARN_AVAILABLE = True
|
|
54
|
+
except ImportError:
|
|
55
|
+
SKLEARN_AVAILABLE = False
|
|
56
|
+
|
|
57
|
+
# GPU detection
|
|
58
|
+
TORCH_AVAILABLE = False
|
|
59
|
+
CUDA_AVAILABLE = False
|
|
60
|
+
MPS_AVAILABLE = False # Apple Silicon
|
|
61
|
+
|
|
62
|
+
try:
|
|
63
|
+
import torch
|
|
64
|
+
TORCH_AVAILABLE = True
|
|
65
|
+
CUDA_AVAILABLE = torch.cuda.is_available()
|
|
66
|
+
MPS_AVAILABLE = hasattr(torch.backends, 'mps') and torch.backends.mps.is_available()
|
|
67
|
+
except ImportError:
|
|
68
|
+
pass
|
|
69
|
+
|
|
70
|
+
MEMORY_DIR = Path.home() / ".claude-memory"
|
|
71
|
+
EMBEDDING_CACHE_PATH = MEMORY_DIR / "embedding_cache.json"
|
|
72
|
+
MODEL_CACHE_PATH = MEMORY_DIR / "models" # Local model storage
|
|
73
|
+
|
|
74
|
+
logger = logging.getLogger(__name__)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
class LRUCache:
|
|
78
|
+
"""Simple LRU cache for embeddings."""
|
|
79
|
+
|
|
80
|
+
def __init__(self, max_size: int = CACHE_MAX_SIZE):
|
|
81
|
+
self.cache = OrderedDict()
|
|
82
|
+
self.max_size = max_size
|
|
83
|
+
|
|
84
|
+
def get(self, key: str) -> Optional[np.ndarray]:
|
|
85
|
+
"""Get item from cache, moving to end (most recent)."""
|
|
86
|
+
if key not in self.cache:
|
|
87
|
+
return None
|
|
88
|
+
|
|
89
|
+
# Move to end (most recently used)
|
|
90
|
+
self.cache.move_to_end(key)
|
|
91
|
+
return np.array(self.cache[key])
|
|
92
|
+
|
|
93
|
+
def set(self, key: str, value: np.ndarray):
|
|
94
|
+
"""Set item in cache, evicting oldest if full."""
|
|
95
|
+
if key in self.cache:
|
|
96
|
+
# Update existing
|
|
97
|
+
self.cache.move_to_end(key)
|
|
98
|
+
self.cache[key] = value.tolist()
|
|
99
|
+
else:
|
|
100
|
+
# Add new
|
|
101
|
+
if len(self.cache) >= self.max_size:
|
|
102
|
+
# Evict oldest
|
|
103
|
+
self.cache.popitem(last=False)
|
|
104
|
+
self.cache[key] = value.tolist()
|
|
105
|
+
|
|
106
|
+
def save(self, path: Path):
|
|
107
|
+
"""Save cache to disk."""
|
|
108
|
+
try:
|
|
109
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
110
|
+
with open(path, 'w') as f:
|
|
111
|
+
json.dump(dict(self.cache), f)
|
|
112
|
+
logger.debug(f"Saved {len(self.cache)} cached embeddings")
|
|
113
|
+
except Exception as e:
|
|
114
|
+
logger.error(f"Failed to save embedding cache: {e}")
|
|
115
|
+
|
|
116
|
+
def load(self, path: Path):
|
|
117
|
+
"""Load cache from disk."""
|
|
118
|
+
if not path.exists():
|
|
119
|
+
return
|
|
120
|
+
|
|
121
|
+
try:
|
|
122
|
+
with open(path, 'r') as f:
|
|
123
|
+
data = json.load(f)
|
|
124
|
+
self.cache = OrderedDict(data)
|
|
125
|
+
logger.info(f"Loaded {len(self.cache)} cached embeddings")
|
|
126
|
+
except Exception as e:
|
|
127
|
+
logger.error(f"Failed to load embedding cache: {e}")
|
|
128
|
+
self.cache = OrderedDict()
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
class EmbeddingEngine:
|
|
132
|
+
"""
|
|
133
|
+
Local embedding generation using sentence-transformers.
|
|
134
|
+
|
|
135
|
+
Features:
|
|
136
|
+
- all-MiniLM-L6-v2 model (384 dimensions, 80MB, fast)
|
|
137
|
+
- Batch processing for efficiency (up to 128 texts)
|
|
138
|
+
- GPU acceleration (CUDA/MPS) with automatic detection
|
|
139
|
+
- LRU cache for repeated queries (10K entries)
|
|
140
|
+
- Graceful fallback to TF-IDF if dependencies unavailable
|
|
141
|
+
|
|
142
|
+
Performance:
|
|
143
|
+
- CPU: ~100 embeddings/sec
|
|
144
|
+
- GPU (CUDA): ~1000 embeddings/sec
|
|
145
|
+
- Apple Silicon (MPS): ~500 embeddings/sec
|
|
146
|
+
- Cache hit: ~0.001ms
|
|
147
|
+
"""
|
|
148
|
+
|
|
149
|
+
def __init__(
|
|
150
|
+
self,
|
|
151
|
+
model_name: str = "all-MiniLM-L6-v2",
|
|
152
|
+
device: Optional[str] = None,
|
|
153
|
+
cache_path: Optional[Path] = None,
|
|
154
|
+
model_cache_path: Optional[Path] = None,
|
|
155
|
+
use_cache: bool = True
|
|
156
|
+
):
|
|
157
|
+
"""
|
|
158
|
+
Initialize embedding engine.
|
|
159
|
+
|
|
160
|
+
Args:
|
|
161
|
+
model_name: Sentence transformer model name (default: all-MiniLM-L6-v2)
|
|
162
|
+
device: Device to use ('cuda', 'mps', 'cpu', or None for auto)
|
|
163
|
+
cache_path: Custom path for embedding cache
|
|
164
|
+
model_cache_path: Custom path for model storage
|
|
165
|
+
use_cache: Whether to use LRU cache
|
|
166
|
+
|
|
167
|
+
Available models:
|
|
168
|
+
- all-MiniLM-L6-v2: 384 dim, 80MB, fast, recommended
|
|
169
|
+
- all-mpnet-base-v2: 768 dim, 420MB, more accurate
|
|
170
|
+
- paraphrase-multilingual: 384 dim, 420MB, multilingual
|
|
171
|
+
"""
|
|
172
|
+
self.model_name = model_name
|
|
173
|
+
self.cache_path = cache_path or EMBEDDING_CACHE_PATH
|
|
174
|
+
self.model_cache_path = model_cache_path or MODEL_CACHE_PATH
|
|
175
|
+
self.use_cache = use_cache
|
|
176
|
+
|
|
177
|
+
# Auto-detect device
|
|
178
|
+
if device is None:
|
|
179
|
+
if CUDA_AVAILABLE:
|
|
180
|
+
device = 'cuda'
|
|
181
|
+
logger.info("Using CUDA GPU acceleration")
|
|
182
|
+
elif MPS_AVAILABLE:
|
|
183
|
+
device = 'mps'
|
|
184
|
+
logger.info("Using Apple Silicon (MPS) GPU acceleration")
|
|
185
|
+
else:
|
|
186
|
+
device = 'cpu'
|
|
187
|
+
logger.info("Using CPU (consider GPU for faster processing)")
|
|
188
|
+
self.device = device
|
|
189
|
+
|
|
190
|
+
# Initialize model
|
|
191
|
+
self.model = None
|
|
192
|
+
self.dimension = 384 # Default for all-MiniLM-L6-v2
|
|
193
|
+
self.use_transformers = SENTENCE_TRANSFORMERS_AVAILABLE
|
|
194
|
+
|
|
195
|
+
# Initialize cache
|
|
196
|
+
self.cache = LRUCache(max_size=CACHE_MAX_SIZE) if use_cache else None
|
|
197
|
+
|
|
198
|
+
# Load cache from disk
|
|
199
|
+
if self.cache:
|
|
200
|
+
self.cache.load(self.cache_path)
|
|
201
|
+
|
|
202
|
+
# Fallback: TF-IDF vectorizer
|
|
203
|
+
self.tfidf_vectorizer = None
|
|
204
|
+
self.tfidf_fitted = False
|
|
205
|
+
|
|
206
|
+
# Load model
|
|
207
|
+
self._load_model()
|
|
208
|
+
|
|
209
|
+
def _load_model(self):
|
|
210
|
+
"""Load sentence transformer model or fallback to TF-IDF."""
|
|
211
|
+
if not self.use_transformers:
|
|
212
|
+
logger.warning(
|
|
213
|
+
"sentence-transformers unavailable. Install with: "
|
|
214
|
+
"pip install sentence-transformers"
|
|
215
|
+
)
|
|
216
|
+
self._init_fallback()
|
|
217
|
+
return
|
|
218
|
+
|
|
219
|
+
try:
|
|
220
|
+
# Create model cache directory
|
|
221
|
+
self.model_cache_path.mkdir(parents=True, exist_ok=True)
|
|
222
|
+
|
|
223
|
+
logger.info(f"Loading model: {self.model_name}")
|
|
224
|
+
start_time = time.time()
|
|
225
|
+
|
|
226
|
+
# Load model with local cache
|
|
227
|
+
self.model = SentenceTransformer(
|
|
228
|
+
self.model_name,
|
|
229
|
+
device=self.device,
|
|
230
|
+
cache_folder=str(self.model_cache_path)
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
# Get actual dimension
|
|
234
|
+
self.dimension = self.model.get_sentence_embedding_dimension()
|
|
235
|
+
|
|
236
|
+
elapsed = time.time() - start_time
|
|
237
|
+
logger.info(
|
|
238
|
+
f"Loaded {self.model_name} ({self.dimension}D) in {elapsed:.2f}s"
|
|
239
|
+
)
|
|
240
|
+
|
|
241
|
+
except Exception as e:
|
|
242
|
+
logger.error(f"Failed to load sentence transformer: {e}")
|
|
243
|
+
logger.info("Falling back to TF-IDF")
|
|
244
|
+
self.use_transformers = False
|
|
245
|
+
self._init_fallback()
|
|
246
|
+
|
|
247
|
+
def _init_fallback(self):
|
|
248
|
+
"""Initialize TF-IDF fallback."""
|
|
249
|
+
if not SKLEARN_AVAILABLE:
|
|
250
|
+
logger.error(
|
|
251
|
+
"sklearn unavailable - no fallback available. "
|
|
252
|
+
"Install: pip install scikit-learn"
|
|
253
|
+
)
|
|
254
|
+
return
|
|
255
|
+
|
|
256
|
+
logger.info("Using TF-IDF fallback (dimension will be dynamic)")
|
|
257
|
+
self.tfidf_vectorizer = TfidfVectorizer(
|
|
258
|
+
max_features=384, # Match sentence transformer dimension
|
|
259
|
+
stop_words='english',
|
|
260
|
+
ngram_range=(1, 2),
|
|
261
|
+
min_df=1
|
|
262
|
+
)
|
|
263
|
+
self.dimension = 384
|
|
264
|
+
|
|
265
|
+
def _get_cache_key(self, text: str) -> str:
|
|
266
|
+
"""Generate cache key for text."""
|
|
267
|
+
return hashlib.sha256(text.encode('utf-8')).hexdigest()[:32]
|
|
268
|
+
|
|
269
|
+
def encode(
|
|
270
|
+
self,
|
|
271
|
+
texts: Union[str, List[str]],
|
|
272
|
+
batch_size: int = 32,
|
|
273
|
+
show_progress: bool = False,
|
|
274
|
+
normalize: bool = True
|
|
275
|
+
) -> np.ndarray:
|
|
276
|
+
"""
|
|
277
|
+
Generate embeddings for text(s).
|
|
278
|
+
|
|
279
|
+
Args:
|
|
280
|
+
texts: Single text or list of texts
|
|
281
|
+
batch_size: Batch size for processing (max: 128)
|
|
282
|
+
show_progress: Show progress bar for large batches
|
|
283
|
+
normalize: Normalize embeddings to unit length
|
|
284
|
+
|
|
285
|
+
Returns:
|
|
286
|
+
Array of shape (n_texts, dimension) or (dimension,) for single text
|
|
287
|
+
|
|
288
|
+
Raises:
|
|
289
|
+
ValueError: If input validation fails
|
|
290
|
+
"""
|
|
291
|
+
# Convert single text to list
|
|
292
|
+
single_input = isinstance(texts, str)
|
|
293
|
+
if single_input:
|
|
294
|
+
texts = [texts]
|
|
295
|
+
|
|
296
|
+
# SECURITY: Input validation
|
|
297
|
+
if len(texts) == 0:
|
|
298
|
+
return np.array([])
|
|
299
|
+
|
|
300
|
+
batch_size = min(batch_size, MAX_BATCH_SIZE)
|
|
301
|
+
|
|
302
|
+
# Validate text length
|
|
303
|
+
for i, text in enumerate(texts):
|
|
304
|
+
if not isinstance(text, str):
|
|
305
|
+
raise ValueError(f"Text at index {i} is not a string")
|
|
306
|
+
if len(text) > MAX_TEXT_LENGTH:
|
|
307
|
+
logger.warning(f"Text {i} truncated from {len(text)} to {MAX_TEXT_LENGTH} chars")
|
|
308
|
+
texts[i] = text[:MAX_TEXT_LENGTH]
|
|
309
|
+
|
|
310
|
+
# Check cache for hits
|
|
311
|
+
embeddings = []
|
|
312
|
+
uncached_texts = []
|
|
313
|
+
uncached_indices = []
|
|
314
|
+
|
|
315
|
+
if self.cache:
|
|
316
|
+
for i, text in enumerate(texts):
|
|
317
|
+
cache_key = self._get_cache_key(text)
|
|
318
|
+
cached = self.cache.get(cache_key)
|
|
319
|
+
|
|
320
|
+
if cached is not None:
|
|
321
|
+
embeddings.append((i, cached))
|
|
322
|
+
else:
|
|
323
|
+
uncached_texts.append(text)
|
|
324
|
+
uncached_indices.append(i)
|
|
325
|
+
else:
|
|
326
|
+
uncached_texts = texts
|
|
327
|
+
uncached_indices = list(range(len(texts)))
|
|
328
|
+
|
|
329
|
+
# Generate embeddings for uncached texts
|
|
330
|
+
if uncached_texts:
|
|
331
|
+
if self.use_transformers and self.model:
|
|
332
|
+
# Use sentence transformer
|
|
333
|
+
uncached_embeddings = self._encode_transformer(
|
|
334
|
+
uncached_texts,
|
|
335
|
+
batch_size=batch_size,
|
|
336
|
+
show_progress=show_progress
|
|
337
|
+
)
|
|
338
|
+
elif self.tfidf_vectorizer:
|
|
339
|
+
# Use TF-IDF fallback
|
|
340
|
+
uncached_embeddings = self._encode_tfidf(uncached_texts)
|
|
341
|
+
else:
|
|
342
|
+
raise RuntimeError("No embedding method available")
|
|
343
|
+
|
|
344
|
+
# Add to cache and results
|
|
345
|
+
for i, text, embedding in zip(uncached_indices, uncached_texts, uncached_embeddings):
|
|
346
|
+
if self.cache:
|
|
347
|
+
cache_key = self._get_cache_key(text)
|
|
348
|
+
self.cache.set(cache_key, embedding)
|
|
349
|
+
embeddings.append((i, embedding))
|
|
350
|
+
|
|
351
|
+
# Sort by original index and extract embeddings
|
|
352
|
+
embeddings.sort(key=lambda x: x[0])
|
|
353
|
+
result = np.array([emb for _, emb in embeddings])
|
|
354
|
+
|
|
355
|
+
# Normalize if requested
|
|
356
|
+
if normalize and len(result) > 0:
|
|
357
|
+
norms = np.linalg.norm(result, axis=1, keepdims=True)
|
|
358
|
+
norms[norms == 0] = 1 # Avoid division by zero
|
|
359
|
+
result = result / norms
|
|
360
|
+
|
|
361
|
+
# Return single embedding if single input
|
|
362
|
+
if single_input:
|
|
363
|
+
return result[0]
|
|
364
|
+
|
|
365
|
+
return result
|
|
366
|
+
|
|
367
|
+
def _encode_transformer(
|
|
368
|
+
self,
|
|
369
|
+
texts: List[str],
|
|
370
|
+
batch_size: int,
|
|
371
|
+
show_progress: bool
|
|
372
|
+
) -> np.ndarray:
|
|
373
|
+
"""Generate embeddings using sentence transformer."""
|
|
374
|
+
try:
|
|
375
|
+
start_time = time.time()
|
|
376
|
+
|
|
377
|
+
embeddings = self.model.encode(
|
|
378
|
+
texts,
|
|
379
|
+
batch_size=batch_size,
|
|
380
|
+
show_progress_bar=show_progress,
|
|
381
|
+
convert_to_numpy=True,
|
|
382
|
+
normalize_embeddings=False # We'll normalize separately
|
|
383
|
+
)
|
|
384
|
+
|
|
385
|
+
elapsed = time.time() - start_time
|
|
386
|
+
rate = len(texts) / elapsed if elapsed > 0 else 0
|
|
387
|
+
logger.debug(f"Encoded {len(texts)} texts in {elapsed:.2f}s ({rate:.0f} texts/sec)")
|
|
388
|
+
|
|
389
|
+
return embeddings
|
|
390
|
+
|
|
391
|
+
except Exception as e:
|
|
392
|
+
logger.error(f"Transformer encoding failed: {e}")
|
|
393
|
+
raise
|
|
394
|
+
|
|
395
|
+
def _encode_tfidf(self, texts: List[str]) -> np.ndarray:
|
|
396
|
+
"""Generate embeddings using TF-IDF fallback."""
|
|
397
|
+
try:
|
|
398
|
+
if not self.tfidf_fitted:
|
|
399
|
+
# Fit on first use
|
|
400
|
+
logger.info("Fitting TF-IDF vectorizer...")
|
|
401
|
+
self.tfidf_vectorizer.fit(texts)
|
|
402
|
+
self.tfidf_fitted = True
|
|
403
|
+
|
|
404
|
+
embeddings = self.tfidf_vectorizer.transform(texts).toarray()
|
|
405
|
+
|
|
406
|
+
# Pad or truncate to target dimension
|
|
407
|
+
if embeddings.shape[1] < self.dimension:
|
|
408
|
+
padding = np.zeros((embeddings.shape[0], self.dimension - embeddings.shape[1]))
|
|
409
|
+
embeddings = np.hstack([embeddings, padding])
|
|
410
|
+
elif embeddings.shape[1] > self.dimension:
|
|
411
|
+
embeddings = embeddings[:, :self.dimension]
|
|
412
|
+
|
|
413
|
+
return embeddings
|
|
414
|
+
|
|
415
|
+
except Exception as e:
|
|
416
|
+
logger.error(f"TF-IDF encoding failed: {e}")
|
|
417
|
+
raise
|
|
418
|
+
|
|
419
|
+
def encode_batch(
|
|
420
|
+
self,
|
|
421
|
+
texts: List[str],
|
|
422
|
+
batch_size: int = 32,
|
|
423
|
+
show_progress: bool = True
|
|
424
|
+
) -> np.ndarray:
|
|
425
|
+
"""
|
|
426
|
+
Convenience method for batch encoding with progress.
|
|
427
|
+
|
|
428
|
+
Args:
|
|
429
|
+
texts: List of texts to encode
|
|
430
|
+
batch_size: Batch size for processing
|
|
431
|
+
show_progress: Show progress bar
|
|
432
|
+
|
|
433
|
+
Returns:
|
|
434
|
+
Array of shape (n_texts, dimension)
|
|
435
|
+
"""
|
|
436
|
+
return self.encode(texts, batch_size=batch_size, show_progress=show_progress)
|
|
437
|
+
|
|
438
|
+
def similarity(self, embedding1: np.ndarray, embedding2: np.ndarray) -> float:
|
|
439
|
+
"""
|
|
440
|
+
Compute cosine similarity between two embeddings.
|
|
441
|
+
|
|
442
|
+
Args:
|
|
443
|
+
embedding1: First embedding vector
|
|
444
|
+
embedding2: Second embedding vector
|
|
445
|
+
|
|
446
|
+
Returns:
|
|
447
|
+
Similarity score in [0, 1] (higher = more similar)
|
|
448
|
+
"""
|
|
449
|
+
# Normalize
|
|
450
|
+
emb1 = embedding1 / (np.linalg.norm(embedding1) + 1e-8)
|
|
451
|
+
emb2 = embedding2 / (np.linalg.norm(embedding2) + 1e-8)
|
|
452
|
+
|
|
453
|
+
# Cosine similarity
|
|
454
|
+
similarity = np.dot(emb1, emb2)
|
|
455
|
+
|
|
456
|
+
# Clamp to [0, 1]
|
|
457
|
+
return float(max(0.0, min(1.0, similarity)))
|
|
458
|
+
|
|
459
|
+
def save_cache(self):
|
|
460
|
+
"""Save embedding cache to disk."""
|
|
461
|
+
if self.cache:
|
|
462
|
+
self.cache.save(self.cache_path)
|
|
463
|
+
|
|
464
|
+
def clear_cache(self):
|
|
465
|
+
"""Clear embedding cache."""
|
|
466
|
+
if self.cache:
|
|
467
|
+
self.cache.cache.clear()
|
|
468
|
+
logger.info("Cleared embedding cache")
|
|
469
|
+
|
|
470
|
+
def get_stats(self) -> Dict[str, any]:
|
|
471
|
+
"""
|
|
472
|
+
Get embedding engine statistics.
|
|
473
|
+
|
|
474
|
+
Returns:
|
|
475
|
+
Dictionary with engine stats
|
|
476
|
+
"""
|
|
477
|
+
return {
|
|
478
|
+
'sentence_transformers_available': SENTENCE_TRANSFORMERS_AVAILABLE,
|
|
479
|
+
'use_transformers': self.use_transformers,
|
|
480
|
+
'sklearn_available': SKLEARN_AVAILABLE,
|
|
481
|
+
'torch_available': TORCH_AVAILABLE,
|
|
482
|
+
'cuda_available': CUDA_AVAILABLE,
|
|
483
|
+
'mps_available': MPS_AVAILABLE,
|
|
484
|
+
'device': self.device,
|
|
485
|
+
'model_name': self.model_name,
|
|
486
|
+
'dimension': self.dimension,
|
|
487
|
+
'cache_enabled': self.cache is not None,
|
|
488
|
+
'cache_size': len(self.cache.cache) if self.cache else 0,
|
|
489
|
+
'cache_max_size': CACHE_MAX_SIZE,
|
|
490
|
+
'model_loaded': self.model is not None or self.tfidf_vectorizer is not None
|
|
491
|
+
}
|
|
492
|
+
|
|
493
|
+
def add_to_database(
|
|
494
|
+
self,
|
|
495
|
+
db_path: Path,
|
|
496
|
+
embedding_column: str = 'embedding',
|
|
497
|
+
batch_size: int = 32
|
|
498
|
+
):
|
|
499
|
+
"""
|
|
500
|
+
Generate embeddings for all memories in database.
|
|
501
|
+
|
|
502
|
+
Args:
|
|
503
|
+
db_path: Path to SQLite database
|
|
504
|
+
embedding_column: Column name to store embeddings
|
|
505
|
+
batch_size: Batch size for processing
|
|
506
|
+
"""
|
|
507
|
+
conn = sqlite3.connect(db_path)
|
|
508
|
+
cursor = conn.cursor()
|
|
509
|
+
|
|
510
|
+
try:
|
|
511
|
+
# Check if embedding column exists
|
|
512
|
+
cursor.execute("PRAGMA table_info(memories)")
|
|
513
|
+
columns = {row[1] for row in cursor.fetchall()}
|
|
514
|
+
|
|
515
|
+
if embedding_column not in columns:
|
|
516
|
+
# Add column
|
|
517
|
+
logger.info(f"Adding '{embedding_column}' column to database")
|
|
518
|
+
cursor.execute(f'ALTER TABLE memories ADD COLUMN {embedding_column} TEXT')
|
|
519
|
+
conn.commit()
|
|
520
|
+
|
|
521
|
+
# Get memories without embeddings
|
|
522
|
+
cursor.execute(f'''
|
|
523
|
+
SELECT id, content, summary
|
|
524
|
+
FROM memories
|
|
525
|
+
WHERE {embedding_column} IS NULL OR {embedding_column} = ''
|
|
526
|
+
''')
|
|
527
|
+
rows = cursor.fetchall()
|
|
528
|
+
|
|
529
|
+
if not rows:
|
|
530
|
+
logger.info("All memories already have embeddings")
|
|
531
|
+
conn.close()
|
|
532
|
+
return
|
|
533
|
+
|
|
534
|
+
logger.info(f"Generating embeddings for {len(rows)} memories...")
|
|
535
|
+
|
|
536
|
+
# Process in batches
|
|
537
|
+
for i in range(0, len(rows), batch_size):
|
|
538
|
+
batch = rows[i:i+batch_size]
|
|
539
|
+
memory_ids = [row[0] for row in batch]
|
|
540
|
+
|
|
541
|
+
# Combine content and summary
|
|
542
|
+
texts = []
|
|
543
|
+
for row in batch:
|
|
544
|
+
content = row[1] or ""
|
|
545
|
+
summary = row[2] or ""
|
|
546
|
+
text = f"{content} {summary}".strip()
|
|
547
|
+
texts.append(text)
|
|
548
|
+
|
|
549
|
+
# Generate embeddings
|
|
550
|
+
embeddings = self.encode(texts, batch_size=batch_size)
|
|
551
|
+
|
|
552
|
+
# Store in database
|
|
553
|
+
for mem_id, embedding in zip(memory_ids, embeddings):
|
|
554
|
+
embedding_json = json.dumps(embedding.tolist())
|
|
555
|
+
cursor.execute(
|
|
556
|
+
f'UPDATE memories SET {embedding_column} = ? WHERE id = ?',
|
|
557
|
+
(embedding_json, mem_id)
|
|
558
|
+
)
|
|
559
|
+
|
|
560
|
+
conn.commit()
|
|
561
|
+
logger.info(f"Processed {min(i+batch_size, len(rows))}/{len(rows)} memories")
|
|
562
|
+
|
|
563
|
+
# Save cache
|
|
564
|
+
self.save_cache()
|
|
565
|
+
|
|
566
|
+
logger.info(f"Successfully generated embeddings for {len(rows)} memories")
|
|
567
|
+
|
|
568
|
+
finally:
|
|
569
|
+
conn.close()
|
|
570
|
+
|
|
571
|
+
|
|
572
|
+
# CLI interface for testing
|
|
573
|
+
if __name__ == "__main__":
|
|
574
|
+
import sys
|
|
575
|
+
|
|
576
|
+
# Configure logging
|
|
577
|
+
logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')
|
|
578
|
+
|
|
579
|
+
if len(sys.argv) < 2:
|
|
580
|
+
print("EmbeddingEngine CLI - Local Embedding Generation")
|
|
581
|
+
print("\nCommands:")
|
|
582
|
+
print(" python embedding_engine.py stats # Show engine statistics")
|
|
583
|
+
print(" python embedding_engine.py generate # Generate embeddings for database")
|
|
584
|
+
print(" python embedding_engine.py test # Run performance test")
|
|
585
|
+
print(" python embedding_engine.py clear-cache # Clear embedding cache")
|
|
586
|
+
sys.exit(0)
|
|
587
|
+
|
|
588
|
+
command = sys.argv[1]
|
|
589
|
+
|
|
590
|
+
if command == "stats":
|
|
591
|
+
engine = EmbeddingEngine()
|
|
592
|
+
stats = engine.get_stats()
|
|
593
|
+
print(json.dumps(stats, indent=2))
|
|
594
|
+
|
|
595
|
+
elif command == "generate":
|
|
596
|
+
db_path = MEMORY_DIR / "memory.db"
|
|
597
|
+
if not db_path.exists():
|
|
598
|
+
print(f"Database not found at {db_path}")
|
|
599
|
+
sys.exit(1)
|
|
600
|
+
|
|
601
|
+
print("Generating embeddings for all memories...")
|
|
602
|
+
engine = EmbeddingEngine()
|
|
603
|
+
engine.add_to_database(db_path)
|
|
604
|
+
print("Generation complete!")
|
|
605
|
+
print(json.dumps(engine.get_stats(), indent=2))
|
|
606
|
+
|
|
607
|
+
elif command == "clear-cache":
|
|
608
|
+
engine = EmbeddingEngine()
|
|
609
|
+
engine.clear_cache()
|
|
610
|
+
engine.save_cache()
|
|
611
|
+
print("Cache cleared!")
|
|
612
|
+
|
|
613
|
+
elif command == "test":
|
|
614
|
+
print("Running embedding performance test...")
|
|
615
|
+
|
|
616
|
+
engine = EmbeddingEngine()
|
|
617
|
+
|
|
618
|
+
# Test single encoding
|
|
619
|
+
print("\nTest 1: Single text encoding")
|
|
620
|
+
text = "This is a test sentence for embedding generation."
|
|
621
|
+
start = time.time()
|
|
622
|
+
embedding = engine.encode(text)
|
|
623
|
+
elapsed = time.time() - start
|
|
624
|
+
print(f" Time: {elapsed*1000:.2f}ms")
|
|
625
|
+
print(f" Dimension: {len(embedding)}")
|
|
626
|
+
print(f" Sample values: {embedding[:5]}")
|
|
627
|
+
|
|
628
|
+
# Test batch encoding
|
|
629
|
+
print("\nTest 2: Batch encoding (100 texts)")
|
|
630
|
+
texts = [f"This is test sentence number {i} with some content." for i in range(100)]
|
|
631
|
+
start = time.time()
|
|
632
|
+
embeddings = engine.encode(texts, batch_size=32)
|
|
633
|
+
elapsed = time.time() - start
|
|
634
|
+
print(f" Time: {elapsed*1000:.2f}ms ({100/elapsed:.0f} texts/sec)")
|
|
635
|
+
print(f" Shape: {embeddings.shape}")
|
|
636
|
+
|
|
637
|
+
# Test cache
|
|
638
|
+
print("\nTest 3: Cache performance")
|
|
639
|
+
start = time.time()
|
|
640
|
+
embedding_cached = engine.encode(text)
|
|
641
|
+
elapsed = time.time() - start
|
|
642
|
+
print(f" Cache hit time: {elapsed*1000:.4f}ms")
|
|
643
|
+
print(f" Speedup: {(elapsed*1000):.0f}x faster")
|
|
644
|
+
|
|
645
|
+
# Test similarity
|
|
646
|
+
print("\nTest 4: Similarity computation")
|
|
647
|
+
text1 = "The weather is nice today."
|
|
648
|
+
text2 = "It's a beautiful day outside."
|
|
649
|
+
text3 = "Python is a programming language."
|
|
650
|
+
|
|
651
|
+
emb1 = engine.encode(text1)
|
|
652
|
+
emb2 = engine.encode(text2)
|
|
653
|
+
emb3 = engine.encode(text3)
|
|
654
|
+
|
|
655
|
+
sim_12 = engine.similarity(emb1, emb2)
|
|
656
|
+
sim_13 = engine.similarity(emb1, emb3)
|
|
657
|
+
|
|
658
|
+
print(f" Similarity (weather vs beautiful day): {sim_12:.3f}")
|
|
659
|
+
print(f" Similarity (weather vs programming): {sim_13:.3f}")
|
|
660
|
+
|
|
661
|
+
# Print stats
|
|
662
|
+
print("\nEngine statistics:")
|
|
663
|
+
print(json.dumps(engine.get_stats(), indent=2))
|
|
664
|
+
|
|
665
|
+
# Save cache
|
|
666
|
+
engine.save_cache()
|
|
667
|
+
print("\nCache saved!")
|
|
668
|
+
|
|
669
|
+
else:
|
|
670
|
+
print(f"Unknown command: {command}")
|
|
671
|
+
print("Run without arguments to see available commands.")
|