superlocalmemory 3.0.30 → 3.0.32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -16,12 +16,13 @@ SuperLocalMemory V3 - Intelligent local memory system for AI coding assistants.
16
16
 
17
17
  ---
18
18
 
19
- ## [3.0.30] - 2026-03-21
19
+ ## [3.0.31] - 2026-03-21
20
20
 
21
21
  ### Fixed
22
22
  - Profile switching and display uses correct identifiers
23
23
  - Profile sync across CLI, Dashboard, and MCP — all entry points now see the same profiles
24
24
  - Profile switching now persists correctly across restarts
25
+ - Resolve circular import in server module loading
25
26
 
26
27
  ---
27
28
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "superlocalmemory",
3
- "version": "3.0.30",
3
+ "version": "3.0.32",
4
4
  "description": "Information-geometric agent memory with mathematical guarantees. 4-channel retrieval, Fisher-Rao similarity, zero-LLM mode, EU AI Act compliant. Works with Claude, Cursor, Windsurf, and 17+ AI tools.",
5
5
  "keywords": [
6
6
  "ai-memory",
package/pyproject.toml CHANGED
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "superlocalmemory"
3
- version = "3.0.30"
3
+ version = "3.0.32"
4
4
  description = "Information-geometric agent memory with mathematical guarantees"
5
5
  readme = "README.md"
6
6
  license = {text = "MIT"}
@@ -37,6 +37,11 @@ class EmbeddingConfig:
37
37
 
38
38
  model_name: str = "nomic-ai/nomic-embed-text-v1.5"
39
39
  dimension: int = 768
40
+ # Provider: "" = auto-detect, "sentence-transformers", "ollama", "cloud"
41
+ provider: str = ""
42
+ # Ollama settings (used when provider="ollama" or auto-detected)
43
+ ollama_model: str = "nomic-embed-text"
44
+ ollama_base_url: str = "http://localhost:11434"
40
45
  # Azure / cloud settings (Mode C only)
41
46
  api_endpoint: str = ""
42
47
  api_key: str = ""
@@ -45,7 +50,11 @@ class EmbeddingConfig:
45
50
 
46
51
  @property
47
52
  def is_cloud(self) -> bool:
48
- return bool(self.api_endpoint)
53
+ return bool(self.api_endpoint) or self.provider == "cloud"
54
+
55
+ @property
56
+ def is_ollama(self) -> bool:
57
+ return self.provider == "ollama"
49
58
 
50
59
 
51
60
  # ---------------------------------------------------------------------------
@@ -235,15 +244,17 @@ class SLMConfig:
235
244
  data = json.loads(path.read_text())
236
245
  mode = Mode(data.get("mode", "a"))
237
246
  llm_data = data.get("llm", {})
247
+ emb_data = data.get("embedding", {})
238
248
  config = cls.for_mode(
239
249
  mode,
240
250
  llm_provider=llm_data.get("provider", ""),
241
251
  llm_model=llm_data.get("model", ""),
242
252
  llm_api_key=llm_data.get("api_key", ""),
243
253
  llm_api_base=llm_data.get("base_url", ""),
244
- embedding_endpoint=data.get("embedding", {}).get("api_endpoint", ""),
245
- embedding_key=data.get("embedding", {}).get("api_key", ""),
246
- embedding_deployment=data.get("embedding", {}).get("deployment_name", ""),
254
+ embedding_provider=emb_data.get("provider", ""),
255
+ embedding_endpoint=emb_data.get("api_endpoint", ""),
256
+ embedding_key=emb_data.get("api_key", ""),
257
+ embedding_deployment=emb_data.get("deployment_name", ""),
247
258
  )
248
259
  config.active_profile = data.get("active_profile", "default")
249
260
  return config
@@ -265,6 +276,7 @@ class SLMConfig:
265
276
  "embedding": {
266
277
  "model_name": self.embedding.model_name,
267
278
  "dimension": self.embedding.dimension,
279
+ "provider": self.embedding.provider,
268
280
  "api_endpoint": self.embedding.api_endpoint,
269
281
  "api_key": self.embedding.api_key,
270
282
  "deployment_name": self.embedding.deployment_name,
@@ -317,6 +329,7 @@ class SLMConfig:
317
329
  llm_model: str = "",
318
330
  llm_api_key: str = "",
319
331
  llm_api_base: str = "",
332
+ embedding_provider: str = "",
320
333
  embedding_endpoint: str = "",
321
334
  embedding_key: str = "",
322
335
  embedding_deployment: str = "",
@@ -331,6 +344,7 @@ class SLMConfig:
331
344
  embedding=EmbeddingConfig(
332
345
  model_name="nomic-ai/nomic-embed-text-v1.5",
333
346
  dimension=768,
347
+ provider=embedding_provider,
334
348
  ),
335
349
  llm=LLMConfig(), # No LLM
336
350
  retrieval=RetrievalConfig(
@@ -348,6 +362,7 @@ class SLMConfig:
348
362
  embedding=EmbeddingConfig(
349
363
  model_name="nomic-ai/nomic-embed-text-v1.5",
350
364
  dimension=768,
365
+ provider=embedding_provider,
351
366
  ),
352
367
  llm=LLMConfig(
353
368
  provider=llm_provider or "ollama",
@@ -18,6 +18,7 @@ from __future__ import annotations
18
18
  import json
19
19
  import logging
20
20
  import os
21
+ import select
21
22
  import subprocess
22
23
  import sys
23
24
  import threading
@@ -45,6 +46,7 @@ class DimensionMismatchError(RuntimeError):
45
46
 
46
47
 
47
48
  _IDLE_TIMEOUT_SECONDS = 120 # 2 minutes — kill worker after idle
49
+ _SUBPROCESS_RESPONSE_TIMEOUT = 60 # seconds — max wait for worker response
48
50
 
49
51
 
50
52
  class EmbeddingService:
@@ -137,7 +139,11 @@ class EmbeddingService:
137
139
  # ------------------------------------------------------------------
138
140
 
139
141
  def _subprocess_embed(self, texts: list[str]) -> list[list[float]] | None:
140
- """Send texts to worker subprocess, get embeddings back."""
142
+ """Send texts to worker subprocess, get embeddings back.
143
+
144
+ Includes a timeout (_SUBPROCESS_RESPONSE_TIMEOUT seconds) so the CLI
145
+ never hangs indefinitely on cold model loads or network issues.
146
+ """
141
147
  with self._lock:
142
148
  self._ensure_worker()
143
149
  if self._worker_proc is None:
@@ -153,9 +159,12 @@ class EmbeddingService:
153
159
  try:
154
160
  self._worker_proc.stdin.write(req)
155
161
  self._worker_proc.stdin.flush()
156
- resp_line = self._worker_proc.stdout.readline()
162
+ resp_line = self._readline_with_timeout(
163
+ self._worker_proc.stdout,
164
+ _SUBPROCESS_RESPONSE_TIMEOUT,
165
+ )
157
166
  if not resp_line:
158
- logger.warning("Worker returned empty response, restarting")
167
+ logger.warning("Worker returned empty or timed out, restarting")
159
168
  self._kill_worker()
160
169
  return None
161
170
  resp = json.loads(resp_line)
@@ -169,6 +178,31 @@ class EmbeddingService:
169
178
  self._kill_worker()
170
179
  return None
171
180
 
181
+ @staticmethod
182
+ def _readline_with_timeout(stream, timeout_seconds: float) -> str:
183
+ """Read a line from stream with a timeout. Returns '' on timeout."""
184
+ result_container: list[str] = []
185
+ error_container: list[Exception] = []
186
+
187
+ def _read() -> None:
188
+ try:
189
+ result_container.append(stream.readline())
190
+ except Exception as exc:
191
+ error_container.append(exc)
192
+
193
+ reader = threading.Thread(target=_read, daemon=True)
194
+ reader.start()
195
+ reader.join(timeout=timeout_seconds)
196
+
197
+ if reader.is_alive():
198
+ logger.warning(
199
+ "Embedding worker did not respond within %ds", timeout_seconds,
200
+ )
201
+ return ""
202
+ if error_container:
203
+ raise error_container[0]
204
+ return result_container[0] if result_container else ""
205
+
172
206
  def _ensure_worker(self) -> None:
173
207
  """Spawn worker subprocess if not running."""
174
208
  if self._worker_proc is not None and self._worker_proc.poll() is None:
@@ -81,12 +81,7 @@ class MemoryEngine:
81
81
 
82
82
  self._db = DatabaseManager(self._config.db_path)
83
83
  self._db.initialize(schema)
84
- try:
85
- emb = EmbeddingService(self._config.embedding)
86
- self._embedder = emb if emb.is_available else None
87
- except Exception as exc:
88
- logger.warning("Embeddings unavailable (%s). BM25-only mode.", exc)
89
- self._embedder = None
84
+ self._embedder = self._init_embedder()
90
85
 
91
86
  if self._caps.llm_fact_extraction:
92
87
  self._llm = LLMBackbone(self._config.llm)
@@ -115,6 +110,76 @@ class MemoryEngine:
115
110
  logger.info("MemoryEngine initialized: mode=%s profile=%s",
116
111
  self._config.mode.value, self._profile_id)
117
112
 
113
+ def _init_embedder(self):
114
+ """Initialize the best available embedding provider.
115
+
116
+ Priority order:
117
+ 1. Explicit provider in config (ollama / cloud / sentence-transformers)
118
+ 2. Auto-detect: if LLM provider=ollama and Ollama has embedding model → use it
119
+ 3. Fallback to sentence-transformers subprocess
120
+ 4. If nothing works → None (BM25-only mode)
121
+ """
122
+ from superlocalmemory.core.embeddings import EmbeddingService
123
+
124
+ emb_cfg = self._config.embedding
125
+ provider = emb_cfg.provider
126
+
127
+ # --- Explicit ollama provider ---
128
+ if provider == "ollama":
129
+ return self._try_ollama_embedder(emb_cfg)
130
+
131
+ # --- Explicit cloud provider ---
132
+ if provider == "cloud" or emb_cfg.is_cloud:
133
+ return self._try_service_embedder(EmbeddingService, emb_cfg)
134
+
135
+ # --- Explicit sentence-transformers ---
136
+ if provider == "sentence-transformers":
137
+ return self._try_service_embedder(EmbeddingService, emb_cfg)
138
+
139
+ # --- Auto-detect: try Ollama first (fast path, <1s) ---
140
+ # Check regardless of LLM provider — if Ollama is running and has
141
+ # the embedding model, use it. This avoids the 30s cold start of
142
+ # sentence-transformers subprocess.
143
+ ollama_emb = self._try_ollama_embedder(emb_cfg)
144
+ if ollama_emb is not None:
145
+ logger.info("Auto-detected Ollama embeddings (fast path)")
146
+ return ollama_emb
147
+
148
+ # --- Fallback: sentence-transformers subprocess ---
149
+ return self._try_service_embedder(EmbeddingService, emb_cfg)
150
+
151
+ def _try_ollama_embedder(self, emb_cfg):
152
+ """Try to create an OllamaEmbedder. Returns it or None."""
153
+ try:
154
+ from superlocalmemory.core.ollama_embedder import OllamaEmbedder
155
+ emb = OllamaEmbedder(
156
+ model=emb_cfg.ollama_model,
157
+ base_url=emb_cfg.ollama_base_url,
158
+ dimension=emb_cfg.dimension,
159
+ )
160
+ if emb.is_available:
161
+ logger.info("Using Ollama embeddings (%s)", emb_cfg.ollama_model)
162
+ return emb
163
+ logger.warning(
164
+ "Ollama embedder not available (model=%s). Falling back.",
165
+ emb_cfg.ollama_model,
166
+ )
167
+ except Exception as exc:
168
+ logger.warning("OllamaEmbedder init failed: %s", exc)
169
+ return None
170
+
171
+ @staticmethod
172
+ def _try_service_embedder(cls, emb_cfg):
173
+ """Try to create an EmbeddingService. Returns it or None."""
174
+ try:
175
+ emb = cls(emb_cfg)
176
+ if emb.is_available:
177
+ return emb
178
+ logger.warning("EmbeddingService not available. BM25-only mode.")
179
+ except Exception as exc:
180
+ logger.warning("Embeddings unavailable (%s). BM25-only mode.", exc)
181
+ return None
182
+
118
183
  def store(
119
184
  self,
120
185
  content: str,
@@ -0,0 +1,179 @@
1
+ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
2
+ # Licensed under the MIT License - see LICENSE file
3
+ # Part of SuperLocalMemory V3 | https://qualixar.com | https://varunpratap.com
4
+
5
+ """Ollama Embedding Provider — lightweight HTTP-based embeddings.
6
+
7
+ Uses Ollama's /api/embed endpoint for fast local embeddings without
8
+ loading PyTorch or sentence-transformers into the process.
9
+
10
+ Typical latency: <1 second (vs 30s cold start for sentence-transformers).
11
+ Memory: ~0 MB in the SLM process (Ollama manages its own memory).
12
+
13
+ Part of Qualixar | Author: Varun Pratap Bhardwaj
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ import logging
19
+ from typing import TYPE_CHECKING
20
+
21
+ import numpy as np
22
+
23
+ if TYPE_CHECKING:
24
+ pass
25
+
26
+ logger = logging.getLogger(__name__)
27
+
28
+ # Fisher variance constants (must match EmbeddingService)
29
+ _FISHER_VAR_MIN = 0.05
30
+ _FISHER_VAR_MAX = 2.0
31
+ _FISHER_VAR_RANGE = _FISHER_VAR_MAX - _FISHER_VAR_MIN
32
+
33
+ # Ollama connect/response timeouts
34
+ _CONNECT_TIMEOUT = 5.0
35
+ _RESPONSE_TIMEOUT = 30.0
36
+
37
+
38
+ class OllamaEmbedder:
39
+ """Embedding service backed by a local Ollama instance.
40
+
41
+ Drop-in replacement for EmbeddingService. Implements the same
42
+ public interface (embed, embed_batch, compute_fisher_params,
43
+ is_available, dimension) so the engine can swap transparently.
44
+ """
45
+
46
+ def __init__(
47
+ self,
48
+ model: str = "nomic-embed-text",
49
+ base_url: str = "http://localhost:11434",
50
+ dimension: int = 768,
51
+ ) -> None:
52
+ self._model = model
53
+ self._base_url = base_url.rstrip("/")
54
+ self._dimension = dimension
55
+ self._available: bool | None = None # lazy-checked
56
+
57
+ # ------------------------------------------------------------------
58
+ # Public interface (matches EmbeddingService)
59
+ # ------------------------------------------------------------------
60
+
61
+ @property
62
+ def is_available(self) -> bool:
63
+ """Check if Ollama is reachable and the model is pulled."""
64
+ if self._available is not None:
65
+ return self._available
66
+ self._available = self._check_availability()
67
+ return self._available
68
+
69
+ @property
70
+ def dimension(self) -> int:
71
+ return self._dimension
72
+
73
+ def embed(self, text: str) -> list[float] | None:
74
+ """Embed a single text. Returns normalized vector or None on failure."""
75
+ if not text or not text.strip():
76
+ raise ValueError("Cannot embed empty text")
77
+ try:
78
+ return self._call_ollama_embed(text)
79
+ except Exception as exc:
80
+ logger.warning("Ollama embed failed: %s", exc)
81
+ return None
82
+
83
+ def embed_batch(self, texts: list[str]) -> list[list[float] | None]:
84
+ """Embed a batch of texts. Uses the batch API when available."""
85
+ if not texts:
86
+ raise ValueError("Cannot embed empty batch")
87
+ try:
88
+ return self._call_ollama_embed_batch(texts)
89
+ except Exception as exc:
90
+ logger.warning("Ollama batch embed failed: %s", exc)
91
+ return [None] * len(texts)
92
+
93
+ def compute_fisher_params(
94
+ self, embedding: list[float],
95
+ ) -> tuple[list[float], list[float]]:
96
+ """Compute Fisher-Rao parameters from a raw embedding."""
97
+ arr = np.asarray(embedding, dtype=np.float64)
98
+ norm = float(np.linalg.norm(arr))
99
+ if norm < 1e-10:
100
+ mean = np.zeros(len(arr), dtype=np.float64)
101
+ variance = np.full(len(arr), _FISHER_VAR_MAX, dtype=np.float64)
102
+ return mean.tolist(), variance.tolist()
103
+ mean = arr / norm
104
+ abs_mean = np.abs(mean)
105
+ max_val = float(np.max(abs_mean)) + 1e-10
106
+ signal_strength = abs_mean / max_val
107
+ variance = _FISHER_VAR_MAX - _FISHER_VAR_RANGE * signal_strength
108
+ variance = np.clip(variance, _FISHER_VAR_MIN, _FISHER_VAR_MAX)
109
+ return mean.tolist(), variance.tolist()
110
+
111
+ def unload(self) -> None:
112
+ """No-op for Ollama (Ollama manages its own model lifecycle)."""
113
+
114
+ # ------------------------------------------------------------------
115
+ # Internal
116
+ # ------------------------------------------------------------------
117
+
118
+ def _check_availability(self) -> bool:
119
+ """Verify Ollama is running and has the embedding model."""
120
+ import httpx
121
+
122
+ try:
123
+ resp = httpx.get(
124
+ f"{self._base_url}/api/tags",
125
+ timeout=_CONNECT_TIMEOUT,
126
+ )
127
+ if resp.status_code != 200:
128
+ return False
129
+ models = resp.json().get("models", [])
130
+ model_names = [m.get("name", "").split(":")[0] for m in models]
131
+ model_base = self._model.split(":")[0]
132
+ if model_base not in model_names:
133
+ logger.info(
134
+ "Ollama running but model '%s' not found (have: %s)",
135
+ self._model, ", ".join(model_names),
136
+ )
137
+ return False
138
+ return True
139
+ except Exception as exc:
140
+ logger.debug("Ollama not reachable: %s", exc)
141
+ return False
142
+
143
+ def _call_ollama_embed(self, text: str) -> list[float]:
144
+ """Call Ollama embed endpoint for a single text."""
145
+ import httpx
146
+
147
+ resp = httpx.post(
148
+ f"{self._base_url}/api/embed",
149
+ json={"model": self._model, "input": [text]},
150
+ timeout=httpx.Timeout(_RESPONSE_TIMEOUT, connect=_CONNECT_TIMEOUT),
151
+ )
152
+ resp.raise_for_status()
153
+ data = resp.json()
154
+ # Ollama /api/embed returns {"embeddings": [[...]]}
155
+ vec = data["embeddings"][0]
156
+ return self._normalize(vec)
157
+
158
+ def _call_ollama_embed_batch(self, texts: list[str]) -> list[list[float] | None]:
159
+ """Call Ollama embed endpoint with batch input."""
160
+ import httpx
161
+
162
+ resp = httpx.post(
163
+ f"{self._base_url}/api/embed",
164
+ json={"model": self._model, "input": texts},
165
+ timeout=httpx.Timeout(_RESPONSE_TIMEOUT, connect=_CONNECT_TIMEOUT),
166
+ )
167
+ resp.raise_for_status()
168
+ data = resp.json()
169
+ vectors = data.get("embeddings", [])
170
+ return [self._normalize(v) for v in vectors]
171
+
172
+ @staticmethod
173
+ def _normalize(vec: list[float]) -> list[float]:
174
+ """L2-normalize embedding vector."""
175
+ arr = np.asarray(vec, dtype=np.float32)
176
+ norm = float(np.linalg.norm(arr))
177
+ if norm > 1e-10:
178
+ arr = arr / norm
179
+ return arr.tolist()
@@ -171,6 +171,8 @@ class TypeRouter:
171
171
 
172
172
  assert self._template_embeddings is not None
173
173
  fact_emb = self._embedder.embed(fact.content)
174
+ if fact_emb is None:
175
+ return self._classify_keywords(fact)
174
176
 
175
177
  best_type = FactType.SEMANTIC
176
178
  best_score = -1.0
@@ -23,7 +23,7 @@ from pydantic import BaseModel
23
23
  import uvicorn
24
24
 
25
25
  from superlocalmemory.server.security_middleware import SecurityHeadersMiddleware
26
- from superlocalmemory.server.ui import SLM_VERSION
26
+ from superlocalmemory.server.routes.helpers import SLM_VERSION
27
27
 
28
28
  logger = logging.getLogger("superlocalmemory.api_server")
29
29
 
@@ -16,6 +16,42 @@ from typing import Optional
16
16
  from fastapi import HTTPException
17
17
  from pydantic import BaseModel, Field
18
18
 
19
+
20
+ # ---------------------------------------------------------------------------
21
+ # Version detection (shared — avoids circular import between ui.py ↔ v3_api.py)
22
+ # ---------------------------------------------------------------------------
23
+
24
+ def _get_version() -> str:
25
+ """Read version from package.json / pyproject.toml / importlib."""
26
+ try:
27
+ import json as _json
28
+ pkg_root = Path(__file__).resolve().parent.parent.parent.parent
29
+ pkg_json = pkg_root / "package.json"
30
+ if pkg_json.exists():
31
+ with open(pkg_json) as f:
32
+ v = _json.load(f).get("version", "")
33
+ if v:
34
+ return v
35
+ except Exception:
36
+ pass
37
+ try:
38
+ import tomllib
39
+ toml_path = Path(__file__).resolve().parent.parent.parent.parent / "pyproject.toml"
40
+ if toml_path.exists():
41
+ with open(toml_path, "rb") as f:
42
+ return tomllib.load(f)["project"]["version"]
43
+ except Exception:
44
+ pass
45
+ try:
46
+ from importlib.metadata import version
47
+ return version("superlocalmemory")
48
+ except Exception:
49
+ pass
50
+ return "unknown"
51
+
52
+
53
+ SLM_VERSION = _get_version()
54
+
19
55
  # V3 paths (migrated from ~/.claude-memory to ~/.superlocalmemory)
20
56
  MEMORY_DIR = Path.home() / ".superlocalmemory"
21
57
  DB_PATH = MEMORY_DIR / "memory.db"
@@ -12,7 +12,7 @@ import os
12
12
  from pathlib import Path
13
13
  from fastapi import APIRouter, Request
14
14
  from fastapi.responses import JSONResponse
15
- from superlocalmemory.server.ui import SLM_VERSION
15
+ from superlocalmemory.server.routes.helpers import SLM_VERSION
16
16
 
17
17
  logger = logging.getLogger(__name__)
18
18
 
@@ -25,39 +25,7 @@ from datetime import datetime
25
25
  logger = logging.getLogger(__name__)
26
26
 
27
27
 
28
- def _get_version() -> str:
29
- """Read version from package.json (npm), pyproject.toml, or metadata."""
30
- import json as _json
31
- pkg_root = Path(__file__).resolve().parent.parent.parent.parent
32
- # 1. Try package.json FIRST (source of truth for npm installs)
33
- try:
34
- pkg_json = pkg_root / "package.json"
35
- if pkg_json.exists():
36
- with open(pkg_json) as f:
37
- v = _json.load(f).get("version", "")
38
- if v:
39
- return v
40
- except Exception:
41
- pass
42
- # 2. Try pyproject.toml (source of truth for pip installs)
43
- try:
44
- import tomllib
45
- toml_path = pkg_root / "pyproject.toml"
46
- if toml_path.exists():
47
- with open(toml_path, "rb") as f:
48
- return tomllib.load(f)["project"]["version"]
49
- except Exception:
50
- pass
51
- # 3. Fallback to importlib.metadata
52
- try:
53
- from importlib.metadata import version
54
- return version("superlocalmemory")
55
- except Exception:
56
- pass
57
- return "unknown"
58
-
59
-
60
- SLM_VERSION = _get_version()
28
+ from superlocalmemory.server.routes.helpers import SLM_VERSION # noqa: E402
61
29
 
62
30
  _script_dir = str(Path(__file__).parent.resolve())
63
31
  sys.path = [p for p in sys.path if p not in ("", _script_dir)]