superlocalmemory 3.0.31 → 3.0.33
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/pyproject.toml +1 -1
- package/src/superlocalmemory/core/config.py +19 -4
- package/src/superlocalmemory/core/embeddings.py +37 -3
- package/src/superlocalmemory/core/engine.py +71 -6
- package/src/superlocalmemory/core/ollama_embedder.py +179 -0
- package/src/superlocalmemory/encoding/type_router.py +2 -0
- package/src/superlocalmemory/storage/database.py +49 -16
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "superlocalmemory",
|
|
3
|
-
"version": "3.0.
|
|
3
|
+
"version": "3.0.33",
|
|
4
4
|
"description": "Information-geometric agent memory with mathematical guarantees. 4-channel retrieval, Fisher-Rao similarity, zero-LLM mode, EU AI Act compliant. Works with Claude, Cursor, Windsurf, and 17+ AI tools.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"ai-memory",
|
package/pyproject.toml
CHANGED
|
@@ -37,6 +37,11 @@ class EmbeddingConfig:
|
|
|
37
37
|
|
|
38
38
|
model_name: str = "nomic-ai/nomic-embed-text-v1.5"
|
|
39
39
|
dimension: int = 768
|
|
40
|
+
# Provider: "" = auto-detect, "sentence-transformers", "ollama", "cloud"
|
|
41
|
+
provider: str = ""
|
|
42
|
+
# Ollama settings (used when provider="ollama" or auto-detected)
|
|
43
|
+
ollama_model: str = "nomic-embed-text"
|
|
44
|
+
ollama_base_url: str = "http://localhost:11434"
|
|
40
45
|
# Azure / cloud settings (Mode C only)
|
|
41
46
|
api_endpoint: str = ""
|
|
42
47
|
api_key: str = ""
|
|
@@ -45,7 +50,11 @@ class EmbeddingConfig:
|
|
|
45
50
|
|
|
46
51
|
@property
|
|
47
52
|
def is_cloud(self) -> bool:
|
|
48
|
-
return bool(self.api_endpoint)
|
|
53
|
+
return bool(self.api_endpoint) or self.provider == "cloud"
|
|
54
|
+
|
|
55
|
+
@property
|
|
56
|
+
def is_ollama(self) -> bool:
|
|
57
|
+
return self.provider == "ollama"
|
|
49
58
|
|
|
50
59
|
|
|
51
60
|
# ---------------------------------------------------------------------------
|
|
@@ -235,15 +244,17 @@ class SLMConfig:
|
|
|
235
244
|
data = json.loads(path.read_text())
|
|
236
245
|
mode = Mode(data.get("mode", "a"))
|
|
237
246
|
llm_data = data.get("llm", {})
|
|
247
|
+
emb_data = data.get("embedding", {})
|
|
238
248
|
config = cls.for_mode(
|
|
239
249
|
mode,
|
|
240
250
|
llm_provider=llm_data.get("provider", ""),
|
|
241
251
|
llm_model=llm_data.get("model", ""),
|
|
242
252
|
llm_api_key=llm_data.get("api_key", ""),
|
|
243
253
|
llm_api_base=llm_data.get("base_url", ""),
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
254
|
+
embedding_provider=emb_data.get("provider", ""),
|
|
255
|
+
embedding_endpoint=emb_data.get("api_endpoint", ""),
|
|
256
|
+
embedding_key=emb_data.get("api_key", ""),
|
|
257
|
+
embedding_deployment=emb_data.get("deployment_name", ""),
|
|
247
258
|
)
|
|
248
259
|
config.active_profile = data.get("active_profile", "default")
|
|
249
260
|
return config
|
|
@@ -265,6 +276,7 @@ class SLMConfig:
|
|
|
265
276
|
"embedding": {
|
|
266
277
|
"model_name": self.embedding.model_name,
|
|
267
278
|
"dimension": self.embedding.dimension,
|
|
279
|
+
"provider": self.embedding.provider,
|
|
268
280
|
"api_endpoint": self.embedding.api_endpoint,
|
|
269
281
|
"api_key": self.embedding.api_key,
|
|
270
282
|
"deployment_name": self.embedding.deployment_name,
|
|
@@ -317,6 +329,7 @@ class SLMConfig:
|
|
|
317
329
|
llm_model: str = "",
|
|
318
330
|
llm_api_key: str = "",
|
|
319
331
|
llm_api_base: str = "",
|
|
332
|
+
embedding_provider: str = "",
|
|
320
333
|
embedding_endpoint: str = "",
|
|
321
334
|
embedding_key: str = "",
|
|
322
335
|
embedding_deployment: str = "",
|
|
@@ -331,6 +344,7 @@ class SLMConfig:
|
|
|
331
344
|
embedding=EmbeddingConfig(
|
|
332
345
|
model_name="nomic-ai/nomic-embed-text-v1.5",
|
|
333
346
|
dimension=768,
|
|
347
|
+
provider=embedding_provider,
|
|
334
348
|
),
|
|
335
349
|
llm=LLMConfig(), # No LLM
|
|
336
350
|
retrieval=RetrievalConfig(
|
|
@@ -348,6 +362,7 @@ class SLMConfig:
|
|
|
348
362
|
embedding=EmbeddingConfig(
|
|
349
363
|
model_name="nomic-ai/nomic-embed-text-v1.5",
|
|
350
364
|
dimension=768,
|
|
365
|
+
provider=embedding_provider,
|
|
351
366
|
),
|
|
352
367
|
llm=LLMConfig(
|
|
353
368
|
provider=llm_provider or "ollama",
|
|
@@ -18,6 +18,7 @@ from __future__ import annotations
|
|
|
18
18
|
import json
|
|
19
19
|
import logging
|
|
20
20
|
import os
|
|
21
|
+
import select
|
|
21
22
|
import subprocess
|
|
22
23
|
import sys
|
|
23
24
|
import threading
|
|
@@ -45,6 +46,7 @@ class DimensionMismatchError(RuntimeError):
|
|
|
45
46
|
|
|
46
47
|
|
|
47
48
|
_IDLE_TIMEOUT_SECONDS = 120 # 2 minutes — kill worker after idle
|
|
49
|
+
_SUBPROCESS_RESPONSE_TIMEOUT = 60 # seconds — max wait for worker response
|
|
48
50
|
|
|
49
51
|
|
|
50
52
|
class EmbeddingService:
|
|
@@ -137,7 +139,11 @@ class EmbeddingService:
|
|
|
137
139
|
# ------------------------------------------------------------------
|
|
138
140
|
|
|
139
141
|
def _subprocess_embed(self, texts: list[str]) -> list[list[float]] | None:
|
|
140
|
-
"""Send texts to worker subprocess, get embeddings back.
|
|
142
|
+
"""Send texts to worker subprocess, get embeddings back.
|
|
143
|
+
|
|
144
|
+
Includes a timeout (_SUBPROCESS_RESPONSE_TIMEOUT seconds) so the CLI
|
|
145
|
+
never hangs indefinitely on cold model loads or network issues.
|
|
146
|
+
"""
|
|
141
147
|
with self._lock:
|
|
142
148
|
self._ensure_worker()
|
|
143
149
|
if self._worker_proc is None:
|
|
@@ -153,9 +159,12 @@ class EmbeddingService:
|
|
|
153
159
|
try:
|
|
154
160
|
self._worker_proc.stdin.write(req)
|
|
155
161
|
self._worker_proc.stdin.flush()
|
|
156
|
-
resp_line = self.
|
|
162
|
+
resp_line = self._readline_with_timeout(
|
|
163
|
+
self._worker_proc.stdout,
|
|
164
|
+
_SUBPROCESS_RESPONSE_TIMEOUT,
|
|
165
|
+
)
|
|
157
166
|
if not resp_line:
|
|
158
|
-
logger.warning("Worker returned empty
|
|
167
|
+
logger.warning("Worker returned empty or timed out, restarting")
|
|
159
168
|
self._kill_worker()
|
|
160
169
|
return None
|
|
161
170
|
resp = json.loads(resp_line)
|
|
@@ -169,6 +178,31 @@ class EmbeddingService:
|
|
|
169
178
|
self._kill_worker()
|
|
170
179
|
return None
|
|
171
180
|
|
|
181
|
+
@staticmethod
|
|
182
|
+
def _readline_with_timeout(stream, timeout_seconds: float) -> str:
|
|
183
|
+
"""Read a line from stream with a timeout. Returns '' on timeout."""
|
|
184
|
+
result_container: list[str] = []
|
|
185
|
+
error_container: list[Exception] = []
|
|
186
|
+
|
|
187
|
+
def _read() -> None:
|
|
188
|
+
try:
|
|
189
|
+
result_container.append(stream.readline())
|
|
190
|
+
except Exception as exc:
|
|
191
|
+
error_container.append(exc)
|
|
192
|
+
|
|
193
|
+
reader = threading.Thread(target=_read, daemon=True)
|
|
194
|
+
reader.start()
|
|
195
|
+
reader.join(timeout=timeout_seconds)
|
|
196
|
+
|
|
197
|
+
if reader.is_alive():
|
|
198
|
+
logger.warning(
|
|
199
|
+
"Embedding worker did not respond within %ds", timeout_seconds,
|
|
200
|
+
)
|
|
201
|
+
return ""
|
|
202
|
+
if error_container:
|
|
203
|
+
raise error_container[0]
|
|
204
|
+
return result_container[0] if result_container else ""
|
|
205
|
+
|
|
172
206
|
def _ensure_worker(self) -> None:
|
|
173
207
|
"""Spawn worker subprocess if not running."""
|
|
174
208
|
if self._worker_proc is not None and self._worker_proc.poll() is None:
|
|
@@ -81,12 +81,7 @@ class MemoryEngine:
|
|
|
81
81
|
|
|
82
82
|
self._db = DatabaseManager(self._config.db_path)
|
|
83
83
|
self._db.initialize(schema)
|
|
84
|
-
|
|
85
|
-
emb = EmbeddingService(self._config.embedding)
|
|
86
|
-
self._embedder = emb if emb.is_available else None
|
|
87
|
-
except Exception as exc:
|
|
88
|
-
logger.warning("Embeddings unavailable (%s). BM25-only mode.", exc)
|
|
89
|
-
self._embedder = None
|
|
84
|
+
self._embedder = self._init_embedder()
|
|
90
85
|
|
|
91
86
|
if self._caps.llm_fact_extraction:
|
|
92
87
|
self._llm = LLMBackbone(self._config.llm)
|
|
@@ -115,6 +110,76 @@ class MemoryEngine:
|
|
|
115
110
|
logger.info("MemoryEngine initialized: mode=%s profile=%s",
|
|
116
111
|
self._config.mode.value, self._profile_id)
|
|
117
112
|
|
|
113
|
+
def _init_embedder(self):
|
|
114
|
+
"""Initialize the best available embedding provider.
|
|
115
|
+
|
|
116
|
+
Priority order:
|
|
117
|
+
1. Explicit provider in config (ollama / cloud / sentence-transformers)
|
|
118
|
+
2. Auto-detect: if LLM provider=ollama and Ollama has embedding model → use it
|
|
119
|
+
3. Fallback to sentence-transformers subprocess
|
|
120
|
+
4. If nothing works → None (BM25-only mode)
|
|
121
|
+
"""
|
|
122
|
+
from superlocalmemory.core.embeddings import EmbeddingService
|
|
123
|
+
|
|
124
|
+
emb_cfg = self._config.embedding
|
|
125
|
+
provider = emb_cfg.provider
|
|
126
|
+
|
|
127
|
+
# --- Explicit ollama provider ---
|
|
128
|
+
if provider == "ollama":
|
|
129
|
+
return self._try_ollama_embedder(emb_cfg)
|
|
130
|
+
|
|
131
|
+
# --- Explicit cloud provider ---
|
|
132
|
+
if provider == "cloud" or emb_cfg.is_cloud:
|
|
133
|
+
return self._try_service_embedder(EmbeddingService, emb_cfg)
|
|
134
|
+
|
|
135
|
+
# --- Explicit sentence-transformers ---
|
|
136
|
+
if provider == "sentence-transformers":
|
|
137
|
+
return self._try_service_embedder(EmbeddingService, emb_cfg)
|
|
138
|
+
|
|
139
|
+
# --- Auto-detect: try Ollama first (fast path, <1s) ---
|
|
140
|
+
# Check regardless of LLM provider — if Ollama is running and has
|
|
141
|
+
# the embedding model, use it. This avoids the 30s cold start of
|
|
142
|
+
# sentence-transformers subprocess.
|
|
143
|
+
ollama_emb = self._try_ollama_embedder(emb_cfg)
|
|
144
|
+
if ollama_emb is not None:
|
|
145
|
+
logger.info("Auto-detected Ollama embeddings (fast path)")
|
|
146
|
+
return ollama_emb
|
|
147
|
+
|
|
148
|
+
# --- Fallback: sentence-transformers subprocess ---
|
|
149
|
+
return self._try_service_embedder(EmbeddingService, emb_cfg)
|
|
150
|
+
|
|
151
|
+
def _try_ollama_embedder(self, emb_cfg):
|
|
152
|
+
"""Try to create an OllamaEmbedder. Returns it or None."""
|
|
153
|
+
try:
|
|
154
|
+
from superlocalmemory.core.ollama_embedder import OllamaEmbedder
|
|
155
|
+
emb = OllamaEmbedder(
|
|
156
|
+
model=emb_cfg.ollama_model,
|
|
157
|
+
base_url=emb_cfg.ollama_base_url,
|
|
158
|
+
dimension=emb_cfg.dimension,
|
|
159
|
+
)
|
|
160
|
+
if emb.is_available:
|
|
161
|
+
logger.info("Using Ollama embeddings (%s)", emb_cfg.ollama_model)
|
|
162
|
+
return emb
|
|
163
|
+
logger.warning(
|
|
164
|
+
"Ollama embedder not available (model=%s). Falling back.",
|
|
165
|
+
emb_cfg.ollama_model,
|
|
166
|
+
)
|
|
167
|
+
except Exception as exc:
|
|
168
|
+
logger.warning("OllamaEmbedder init failed: %s", exc)
|
|
169
|
+
return None
|
|
170
|
+
|
|
171
|
+
@staticmethod
|
|
172
|
+
def _try_service_embedder(cls, emb_cfg):
|
|
173
|
+
"""Try to create an EmbeddingService. Returns it or None."""
|
|
174
|
+
try:
|
|
175
|
+
emb = cls(emb_cfg)
|
|
176
|
+
if emb.is_available:
|
|
177
|
+
return emb
|
|
178
|
+
logger.warning("EmbeddingService not available. BM25-only mode.")
|
|
179
|
+
except Exception as exc:
|
|
180
|
+
logger.warning("Embeddings unavailable (%s). BM25-only mode.", exc)
|
|
181
|
+
return None
|
|
182
|
+
|
|
118
183
|
def store(
|
|
119
184
|
self,
|
|
120
185
|
content: str,
|
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
# Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
|
|
2
|
+
# Licensed under the MIT License - see LICENSE file
|
|
3
|
+
# Part of SuperLocalMemory V3 | https://qualixar.com | https://varunpratap.com
|
|
4
|
+
|
|
5
|
+
"""Ollama Embedding Provider — lightweight HTTP-based embeddings.
|
|
6
|
+
|
|
7
|
+
Uses Ollama's /api/embed endpoint for fast local embeddings without
|
|
8
|
+
loading PyTorch or sentence-transformers into the process.
|
|
9
|
+
|
|
10
|
+
Typical latency: <1 second (vs 30s cold start for sentence-transformers).
|
|
11
|
+
Memory: ~0 MB in the SLM process (Ollama manages its own memory).
|
|
12
|
+
|
|
13
|
+
Part of Qualixar | Author: Varun Pratap Bhardwaj
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
import logging
|
|
19
|
+
from typing import TYPE_CHECKING
|
|
20
|
+
|
|
21
|
+
import numpy as np
|
|
22
|
+
|
|
23
|
+
if TYPE_CHECKING:
|
|
24
|
+
pass
|
|
25
|
+
|
|
26
|
+
logger = logging.getLogger(__name__)
|
|
27
|
+
|
|
28
|
+
# Fisher variance constants (must match EmbeddingService)
|
|
29
|
+
_FISHER_VAR_MIN = 0.05
|
|
30
|
+
_FISHER_VAR_MAX = 2.0
|
|
31
|
+
_FISHER_VAR_RANGE = _FISHER_VAR_MAX - _FISHER_VAR_MIN
|
|
32
|
+
|
|
33
|
+
# Ollama connect/response timeouts
|
|
34
|
+
_CONNECT_TIMEOUT = 5.0
|
|
35
|
+
_RESPONSE_TIMEOUT = 30.0
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class OllamaEmbedder:
|
|
39
|
+
"""Embedding service backed by a local Ollama instance.
|
|
40
|
+
|
|
41
|
+
Drop-in replacement for EmbeddingService. Implements the same
|
|
42
|
+
public interface (embed, embed_batch, compute_fisher_params,
|
|
43
|
+
is_available, dimension) so the engine can swap transparently.
|
|
44
|
+
"""
|
|
45
|
+
|
|
46
|
+
def __init__(
|
|
47
|
+
self,
|
|
48
|
+
model: str = "nomic-embed-text",
|
|
49
|
+
base_url: str = "http://localhost:11434",
|
|
50
|
+
dimension: int = 768,
|
|
51
|
+
) -> None:
|
|
52
|
+
self._model = model
|
|
53
|
+
self._base_url = base_url.rstrip("/")
|
|
54
|
+
self._dimension = dimension
|
|
55
|
+
self._available: bool | None = None # lazy-checked
|
|
56
|
+
|
|
57
|
+
# ------------------------------------------------------------------
|
|
58
|
+
# Public interface (matches EmbeddingService)
|
|
59
|
+
# ------------------------------------------------------------------
|
|
60
|
+
|
|
61
|
+
@property
|
|
62
|
+
def is_available(self) -> bool:
|
|
63
|
+
"""Check if Ollama is reachable and the model is pulled."""
|
|
64
|
+
if self._available is not None:
|
|
65
|
+
return self._available
|
|
66
|
+
self._available = self._check_availability()
|
|
67
|
+
return self._available
|
|
68
|
+
|
|
69
|
+
@property
|
|
70
|
+
def dimension(self) -> int:
|
|
71
|
+
return self._dimension
|
|
72
|
+
|
|
73
|
+
def embed(self, text: str) -> list[float] | None:
|
|
74
|
+
"""Embed a single text. Returns normalized vector or None on failure."""
|
|
75
|
+
if not text or not text.strip():
|
|
76
|
+
raise ValueError("Cannot embed empty text")
|
|
77
|
+
try:
|
|
78
|
+
return self._call_ollama_embed(text)
|
|
79
|
+
except Exception as exc:
|
|
80
|
+
logger.warning("Ollama embed failed: %s", exc)
|
|
81
|
+
return None
|
|
82
|
+
|
|
83
|
+
def embed_batch(self, texts: list[str]) -> list[list[float] | None]:
|
|
84
|
+
"""Embed a batch of texts. Uses the batch API when available."""
|
|
85
|
+
if not texts:
|
|
86
|
+
raise ValueError("Cannot embed empty batch")
|
|
87
|
+
try:
|
|
88
|
+
return self._call_ollama_embed_batch(texts)
|
|
89
|
+
except Exception as exc:
|
|
90
|
+
logger.warning("Ollama batch embed failed: %s", exc)
|
|
91
|
+
return [None] * len(texts)
|
|
92
|
+
|
|
93
|
+
def compute_fisher_params(
|
|
94
|
+
self, embedding: list[float],
|
|
95
|
+
) -> tuple[list[float], list[float]]:
|
|
96
|
+
"""Compute Fisher-Rao parameters from a raw embedding."""
|
|
97
|
+
arr = np.asarray(embedding, dtype=np.float64)
|
|
98
|
+
norm = float(np.linalg.norm(arr))
|
|
99
|
+
if norm < 1e-10:
|
|
100
|
+
mean = np.zeros(len(arr), dtype=np.float64)
|
|
101
|
+
variance = np.full(len(arr), _FISHER_VAR_MAX, dtype=np.float64)
|
|
102
|
+
return mean.tolist(), variance.tolist()
|
|
103
|
+
mean = arr / norm
|
|
104
|
+
abs_mean = np.abs(mean)
|
|
105
|
+
max_val = float(np.max(abs_mean)) + 1e-10
|
|
106
|
+
signal_strength = abs_mean / max_val
|
|
107
|
+
variance = _FISHER_VAR_MAX - _FISHER_VAR_RANGE * signal_strength
|
|
108
|
+
variance = np.clip(variance, _FISHER_VAR_MIN, _FISHER_VAR_MAX)
|
|
109
|
+
return mean.tolist(), variance.tolist()
|
|
110
|
+
|
|
111
|
+
def unload(self) -> None:
|
|
112
|
+
"""No-op for Ollama (Ollama manages its own model lifecycle)."""
|
|
113
|
+
|
|
114
|
+
# ------------------------------------------------------------------
|
|
115
|
+
# Internal
|
|
116
|
+
# ------------------------------------------------------------------
|
|
117
|
+
|
|
118
|
+
def _check_availability(self) -> bool:
|
|
119
|
+
"""Verify Ollama is running and has the embedding model."""
|
|
120
|
+
import httpx
|
|
121
|
+
|
|
122
|
+
try:
|
|
123
|
+
resp = httpx.get(
|
|
124
|
+
f"{self._base_url}/api/tags",
|
|
125
|
+
timeout=_CONNECT_TIMEOUT,
|
|
126
|
+
)
|
|
127
|
+
if resp.status_code != 200:
|
|
128
|
+
return False
|
|
129
|
+
models = resp.json().get("models", [])
|
|
130
|
+
model_names = [m.get("name", "").split(":")[0] for m in models]
|
|
131
|
+
model_base = self._model.split(":")[0]
|
|
132
|
+
if model_base not in model_names:
|
|
133
|
+
logger.info(
|
|
134
|
+
"Ollama running but model '%s' not found (have: %s)",
|
|
135
|
+
self._model, ", ".join(model_names),
|
|
136
|
+
)
|
|
137
|
+
return False
|
|
138
|
+
return True
|
|
139
|
+
except Exception as exc:
|
|
140
|
+
logger.debug("Ollama not reachable: %s", exc)
|
|
141
|
+
return False
|
|
142
|
+
|
|
143
|
+
def _call_ollama_embed(self, text: str) -> list[float]:
|
|
144
|
+
"""Call Ollama embed endpoint for a single text."""
|
|
145
|
+
import httpx
|
|
146
|
+
|
|
147
|
+
resp = httpx.post(
|
|
148
|
+
f"{self._base_url}/api/embed",
|
|
149
|
+
json={"model": self._model, "input": [text]},
|
|
150
|
+
timeout=httpx.Timeout(_RESPONSE_TIMEOUT, connect=_CONNECT_TIMEOUT),
|
|
151
|
+
)
|
|
152
|
+
resp.raise_for_status()
|
|
153
|
+
data = resp.json()
|
|
154
|
+
# Ollama /api/embed returns {"embeddings": [[...]]}
|
|
155
|
+
vec = data["embeddings"][0]
|
|
156
|
+
return self._normalize(vec)
|
|
157
|
+
|
|
158
|
+
def _call_ollama_embed_batch(self, texts: list[str]) -> list[list[float] | None]:
|
|
159
|
+
"""Call Ollama embed endpoint with batch input."""
|
|
160
|
+
import httpx
|
|
161
|
+
|
|
162
|
+
resp = httpx.post(
|
|
163
|
+
f"{self._base_url}/api/embed",
|
|
164
|
+
json={"model": self._model, "input": texts},
|
|
165
|
+
timeout=httpx.Timeout(_RESPONSE_TIMEOUT, connect=_CONNECT_TIMEOUT),
|
|
166
|
+
)
|
|
167
|
+
resp.raise_for_status()
|
|
168
|
+
data = resp.json()
|
|
169
|
+
vectors = data.get("embeddings", [])
|
|
170
|
+
return [self._normalize(v) for v in vectors]
|
|
171
|
+
|
|
172
|
+
@staticmethod
|
|
173
|
+
def _normalize(vec: list[float]) -> list[float]:
|
|
174
|
+
"""L2-normalize embedding vector."""
|
|
175
|
+
arr = np.asarray(vec, dtype=np.float32)
|
|
176
|
+
norm = float(np.linalg.norm(arr))
|
|
177
|
+
if norm > 1e-10:
|
|
178
|
+
arr = arr / norm
|
|
179
|
+
return arr.tolist()
|
|
@@ -5,13 +5,14 @@
|
|
|
5
5
|
"""SuperLocalMemory V3 — Database Manager.
|
|
6
6
|
|
|
7
7
|
SQLite with WAL, profile-scoped CRUD, FTS5 search, BM25 persistence.
|
|
8
|
-
|
|
8
|
+
Concurrent-safe: WAL mode + busy_timeout + retry on SQLITE_BUSY.
|
|
9
|
+
Multiple processes (MCP, CLI, integrations) can read/write safely.
|
|
9
10
|
|
|
10
11
|
Part of Qualixar | Author: Varun Pratap Bhardwaj
|
|
11
12
|
"""
|
|
12
13
|
from __future__ import annotations
|
|
13
14
|
|
|
14
|
-
import json, logging, sqlite3, threading
|
|
15
|
+
import json, logging, sqlite3, threading, time
|
|
15
16
|
from contextlib import contextmanager
|
|
16
17
|
from pathlib import Path
|
|
17
18
|
from types import ModuleType
|
|
@@ -37,11 +38,22 @@ def _jd(val: Any) -> str | None:
|
|
|
37
38
|
return json.dumps(val) if val is not None else None
|
|
38
39
|
|
|
39
40
|
|
|
41
|
+
_BUSY_TIMEOUT_MS = 10_000 # 10 seconds — wait for other writers
|
|
42
|
+
_MAX_RETRIES = 5 # retry on transient SQLITE_BUSY
|
|
43
|
+
_RETRY_BASE_DELAY = 0.1 # seconds — exponential backoff base
|
|
44
|
+
|
|
45
|
+
|
|
40
46
|
class DatabaseManager:
|
|
41
|
-
"""
|
|
47
|
+
"""Concurrent-safe SQLite manager with WAL, profile isolation, and FTS5.
|
|
48
|
+
|
|
49
|
+
Designed for multi-process access: MCP server, CLI, LangChain, CrewAI,
|
|
50
|
+
and other integrations can all read/write the same database safely.
|
|
42
51
|
|
|
43
|
-
|
|
44
|
-
|
|
52
|
+
Concurrency model:
|
|
53
|
+
- WAL mode: readers never block writers, writers never block readers
|
|
54
|
+
- busy_timeout: writers wait up to 10s for other writers instead of failing
|
|
55
|
+
- Retry with backoff: transient SQLITE_BUSY errors are retried automatically
|
|
56
|
+
- Per-call connections: no shared state between processes
|
|
45
57
|
"""
|
|
46
58
|
|
|
47
59
|
def __init__(self, db_path: str | Path) -> None:
|
|
@@ -55,6 +67,7 @@ class DatabaseManager:
|
|
|
55
67
|
conn = sqlite3.connect(str(self.db_path))
|
|
56
68
|
try:
|
|
57
69
|
conn.execute("PRAGMA journal_mode=WAL")
|
|
70
|
+
conn.execute(f"PRAGMA busy_timeout={_BUSY_TIMEOUT_MS}")
|
|
58
71
|
conn.execute("PRAGMA foreign_keys=ON")
|
|
59
72
|
conn.commit()
|
|
60
73
|
finally:
|
|
@@ -62,9 +75,8 @@ class DatabaseManager:
|
|
|
62
75
|
|
|
63
76
|
def initialize(self, schema_module: ModuleType) -> None:
|
|
64
77
|
"""Create all tables. *schema_module* must expose ``create_all_tables(conn)``."""
|
|
65
|
-
conn =
|
|
78
|
+
conn = self._connect()
|
|
66
79
|
try:
|
|
67
|
-
conn.execute("PRAGMA foreign_keys=ON")
|
|
68
80
|
schema_module.create_all_tables(conn)
|
|
69
81
|
conn.commit()
|
|
70
82
|
logger.info("Schema initialized at %s", self.db_path)
|
|
@@ -81,8 +93,9 @@ class DatabaseManager:
|
|
|
81
93
|
self.close()
|
|
82
94
|
|
|
83
95
|
def _connect(self) -> sqlite3.Connection:
|
|
84
|
-
conn = sqlite3.connect(str(self.db_path))
|
|
96
|
+
conn = sqlite3.connect(str(self.db_path), timeout=_BUSY_TIMEOUT_MS / 1000)
|
|
85
97
|
conn.row_factory = sqlite3.Row
|
|
98
|
+
conn.execute(f"PRAGMA busy_timeout={_BUSY_TIMEOUT_MS}")
|
|
86
99
|
conn.execute("PRAGMA foreign_keys=ON")
|
|
87
100
|
return conn
|
|
88
101
|
|
|
@@ -103,16 +116,36 @@ class DatabaseManager:
|
|
|
103
116
|
conn.close()
|
|
104
117
|
|
|
105
118
|
def execute(self, sql: str, params: tuple[Any, ...] = ()) -> list[sqlite3.Row]:
|
|
106
|
-
"""Execute SQL
|
|
119
|
+
"""Execute SQL with automatic retry on SQLITE_BUSY.
|
|
120
|
+
|
|
121
|
+
Uses shared conn inside transaction, else per-call with retry.
|
|
122
|
+
"""
|
|
107
123
|
if self._txn_conn is not None:
|
|
108
124
|
return self._txn_conn.execute(sql, params).fetchall()
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
conn.
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
125
|
+
|
|
126
|
+
last_error: Exception | None = None
|
|
127
|
+
for attempt in range(_MAX_RETRIES):
|
|
128
|
+
conn = self._connect()
|
|
129
|
+
try:
|
|
130
|
+
rows = conn.execute(sql, params).fetchall()
|
|
131
|
+
conn.commit()
|
|
132
|
+
return rows
|
|
133
|
+
except sqlite3.OperationalError as exc:
|
|
134
|
+
last_error = exc
|
|
135
|
+
if "locked" in str(exc).lower() or "busy" in str(exc).lower():
|
|
136
|
+
delay = _RETRY_BASE_DELAY * (2 ** attempt)
|
|
137
|
+
logger.debug(
|
|
138
|
+
"DB busy (attempt %d/%d), retrying in %.1fs: %s",
|
|
139
|
+
attempt + 1, _MAX_RETRIES, delay, exc,
|
|
140
|
+
)
|
|
141
|
+
time.sleep(delay)
|
|
142
|
+
continue
|
|
143
|
+
raise
|
|
144
|
+
finally:
|
|
145
|
+
conn.close()
|
|
146
|
+
|
|
147
|
+
logger.warning("DB operation failed after %d retries: %s", _MAX_RETRIES, last_error)
|
|
148
|
+
raise last_error # type: ignore[misc]
|
|
116
149
|
|
|
117
150
|
def store_memory(self, record: MemoryRecord) -> str:
|
|
118
151
|
"""Persist a raw memory record. Returns memory_id."""
|