superlocalmemory 3.0.17 → 3.0.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,193 @@
1
+ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
2
+ # Licensed under the MIT License - see LICENSE file
3
+ # Part of SuperLocalMemory V3 | https://qualixar.com | https://varunpratap.com
4
+
5
+ """Subprocess recall worker — runs the full recall pipeline in isolation.
6
+
7
+ The dashboard/MCP main process NEVER imports torch, numpy, or the engine.
8
+ All heavy work (engine init, embedding, retrieval, reranking) happens here.
9
+
10
+ Protocol (JSON over stdin/stdout):
11
+ Request: {"cmd": "recall", "query": "...", "limit": 10}
12
+ Response: {"ok": true, "results": [...], "query_type": "...", ...}
13
+
14
+ Part of Qualixar | Author: Varun Pratap Bhardwaj
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ import json
20
+ import os
21
+ import sys
22
+
23
+ # Force CPU BEFORE any torch import
24
+ os.environ["CUDA_VISIBLE_DEVICES"] = ""
25
+ os.environ["PYTORCH_MPS_HIGH_WATERMARK_RATIO"] = "0.0"
26
+ os.environ["PYTORCH_MPS_MEM_LIMIT"] = "0"
27
+ os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"
28
+ os.environ["TOKENIZERS_PARALLELISM"] = "false"
29
+ os.environ["TORCH_DEVICE"] = "cpu"
30
+
31
+ _engine = None
32
+
33
+
34
+ def _get_engine():
35
+ global _engine
36
+ if _engine is None:
37
+ from superlocalmemory.core.config import SLMConfig
38
+ from superlocalmemory.core.engine import MemoryEngine
39
+ config = SLMConfig.load()
40
+ _engine = MemoryEngine(config)
41
+ _engine.initialize()
42
+ return _engine
43
+
44
+
45
+ def _handle_recall(query: str, limit: int) -> dict:
46
+ engine = _get_engine()
47
+ response = engine.recall(query, limit=limit)
48
+
49
+ # Batch-fetch original memory text for all results
50
+ memory_ids = list({r.fact.memory_id for r in response.results[:limit] if r.fact.memory_id})
51
+ memory_map = engine._db.get_memory_content_batch(memory_ids) if memory_ids else {}
52
+
53
+ results = []
54
+ for r in response.results[:limit]:
55
+ results.append({
56
+ "fact_id": r.fact.fact_id,
57
+ "memory_id": r.fact.memory_id,
58
+ "content": r.fact.content[:300],
59
+ "source_content": memory_map.get(r.fact.memory_id, ""),
60
+ "score": round(r.score, 4),
61
+ "confidence": round(r.confidence, 4),
62
+ "trust_score": round(r.trust_score, 4),
63
+ "channel_scores": {
64
+ k: round(v, 4) for k, v in (r.channel_scores or {}).items()
65
+ },
66
+ })
67
+ return {
68
+ "ok": True,
69
+ "query": query,
70
+ "query_type": response.query_type,
71
+ "result_count": len(results),
72
+ "retrieval_time_ms": round(response.retrieval_time_ms, 1),
73
+ "results": results,
74
+ }
75
+
76
+
77
+ def _handle_store(content: str, metadata: dict) -> dict:
78
+ engine = _get_engine()
79
+ session_id = metadata.pop("session_id", "")
80
+ fact_ids = engine.store(content, session_id=session_id, metadata=metadata)
81
+ return {"ok": True, "fact_ids": fact_ids, "count": len(fact_ids)}
82
+
83
+
84
+ def _handle_get_memory_facts(memory_id: str) -> dict:
85
+ engine = _get_engine()
86
+ pid = engine.profile_id
87
+ # Get original memory content
88
+ mem_map = engine._db.get_memory_content_batch([memory_id])
89
+ original = mem_map.get(memory_id, "")
90
+ # Get child facts
91
+ facts = engine._db.get_facts_by_memory_id(memory_id, pid)
92
+ fact_list = []
93
+ for f in facts:
94
+ fact_list.append({
95
+ "fact_id": f.fact_id,
96
+ "content": f.content,
97
+ "fact_type": f.fact_type.value if hasattr(f.fact_type, 'value') else str(f.fact_type),
98
+ "confidence": round(f.confidence, 3),
99
+ "created_at": f.created_at,
100
+ })
101
+ return {
102
+ "ok": True,
103
+ "memory_id": memory_id,
104
+ "original_content": original,
105
+ "facts": fact_list,
106
+ "fact_count": len(fact_list),
107
+ }
108
+
109
+
110
+ def _handle_summarize(texts: list[str], mode: str) -> dict:
111
+ """Generate summary using heuristic (A) or LLM (B/C)."""
112
+ from superlocalmemory.core.summarizer import Summarizer
113
+ engine = _get_engine()
114
+ summarizer = Summarizer(engine._config)
115
+ summary = summarizer.summarize_cluster(
116
+ [{"content": t} for t in texts],
117
+ )
118
+ return {"ok": True, "summary": summary}
119
+
120
+
121
+ def _handle_synthesize(query: str, facts: list[dict]) -> dict:
122
+ """Generate synthesized answer from query + facts."""
123
+ from superlocalmemory.core.summarizer import Summarizer
124
+ engine = _get_engine()
125
+ summarizer = Summarizer(engine._config)
126
+ synthesis = summarizer.synthesize_answer(query, facts)
127
+ return {"ok": True, "synthesis": synthesis}
128
+
129
+
130
+ def _handle_status() -> dict:
131
+ engine = _get_engine()
132
+ pid = engine.profile_id
133
+ fact_count = engine._db.get_fact_count(pid)
134
+ return {
135
+ "ok": True,
136
+ "mode": engine._config.mode.value,
137
+ "profile": pid,
138
+ "fact_count": fact_count,
139
+ }
140
+
141
+
142
+ def _worker_main() -> None:
143
+ """Main loop: read JSON requests from stdin, write responses to stdout."""
144
+ for line in sys.stdin:
145
+ line = line.strip()
146
+ if not line:
147
+ continue
148
+ try:
149
+ req = json.loads(line)
150
+ except json.JSONDecodeError:
151
+ _respond({"ok": False, "error": "Invalid JSON"})
152
+ continue
153
+
154
+ cmd = req.get("cmd", "")
155
+
156
+ if cmd == "quit":
157
+ break
158
+
159
+ if cmd == "ping":
160
+ _respond({"ok": True})
161
+ continue
162
+
163
+ try:
164
+ if cmd == "recall":
165
+ result = _handle_recall(req.get("query", ""), req.get("limit", 10))
166
+ _respond(result)
167
+ elif cmd == "store":
168
+ result = _handle_store(req.get("content", ""), req.get("metadata", {}))
169
+ _respond(result)
170
+ elif cmd == "get_memory_facts":
171
+ result = _handle_get_memory_facts(req.get("memory_id", ""))
172
+ _respond(result)
173
+ elif cmd == "summarize":
174
+ result = _handle_summarize(req.get("texts", []), req.get("mode", "a"))
175
+ _respond(result)
176
+ elif cmd == "synthesize":
177
+ result = _handle_synthesize(req.get("query", ""), req.get("facts", []))
178
+ _respond(result)
179
+ elif cmd == "status":
180
+ _respond(_handle_status())
181
+ else:
182
+ _respond({"ok": False, "error": f"Unknown command: {cmd}"})
183
+ except Exception as exc:
184
+ _respond({"ok": False, "error": str(exc)})
185
+
186
+
187
+ def _respond(data: dict) -> None:
188
+ sys.stdout.write(json.dumps(data) + "\n")
189
+ sys.stdout.flush()
190
+
191
+
192
+ if __name__ == "__main__":
193
+ _worker_main()
@@ -0,0 +1,182 @@
1
+ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
2
+ # Licensed under the MIT License - see LICENSE file
3
+ # Part of SuperLocalMemory V3 | https://qualixar.com | https://varunpratap.com
4
+
5
+ """Summarizer — Mode A heuristic + Mode B Ollama + Mode C OpenRouter.
6
+
7
+ Generates cluster summaries and search synthesis. All LLM failures
8
+ fall back to heuristic silently — never crashes the caller.
9
+
10
+ Part of Qualixar | Author: Varun Pratap Bhardwaj
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import logging
16
+ import os
17
+ import re
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+
22
+ class Summarizer:
23
+ """Generate summaries using heuristic or LLM based on mode."""
24
+
25
+ def __init__(self, config) -> None:
26
+ self._config = config
27
+ self._mode = config.mode.value if hasattr(config.mode, 'value') else str(config.mode)
28
+
29
+ # ------------------------------------------------------------------
30
+ # Public API
31
+ # ------------------------------------------------------------------
32
+
33
+ def summarize_cluster(self, members: list[dict]) -> str:
34
+ """Generate a human-readable cluster summary.
35
+
36
+ Args:
37
+ members: List of dicts with 'content' key.
38
+
39
+ Returns:
40
+ Summary string (2-3 sentences).
41
+ """
42
+ texts = [m.get("content", "") for m in members if m.get("content")]
43
+ if not texts:
44
+ return "Empty cluster."
45
+ if self._mode in ("b", "c") and self._has_llm():
46
+ try:
47
+ prompt = self._cluster_prompt(texts[:10])
48
+ return self._call_llm(prompt, max_tokens=150)
49
+ except Exception as exc:
50
+ logger.warning("LLM cluster summary failed, using heuristic: %s", exc)
51
+ return self._heuristic_summary(texts[:5])
52
+
53
+ def synthesize_answer(self, query: str, facts: list[dict]) -> str:
54
+ """Generate a synthesized answer from query + retrieved facts.
55
+
56
+ Returns empty string in Mode A (no LLM available).
57
+ """
58
+ if self._mode == "a" or not self._has_llm():
59
+ return ""
60
+ texts = [f.get("content", "") for f in facts if f.get("content")]
61
+ if not texts:
62
+ return ""
63
+ try:
64
+ prompt = self._synthesis_prompt(query, texts[:8])
65
+ return self._call_llm(prompt, max_tokens=250)
66
+ except Exception as exc:
67
+ logger.warning("LLM synthesis failed: %s", exc)
68
+ return ""
69
+
70
+ # ------------------------------------------------------------------
71
+ # Heuristic (Mode A — always available)
72
+ # ------------------------------------------------------------------
73
+
74
+ def _heuristic_summary(self, texts: list[str]) -> str:
75
+ """First sentence from top-3 texts, joined."""
76
+ sentences = []
77
+ for text in texts[:3]:
78
+ first = self._first_sentence(text)
79
+ if first and first not in sentences:
80
+ sentences.append(first)
81
+ return " ".join(sentences)[:300] if sentences else "No summary available."
82
+
83
+ @staticmethod
84
+ def _first_sentence(text: str) -> str:
85
+ """Extract first sentence (up to period, question mark, or 100 chars)."""
86
+ text = text.strip()
87
+ match = re.match(r'^(.+?[.!?])\s', text)
88
+ if match:
89
+ return match.group(1).strip()
90
+ return text[:100].strip()
91
+
92
+ # ------------------------------------------------------------------
93
+ # LLM calls (Mode B/C)
94
+ # ------------------------------------------------------------------
95
+
96
+ def _has_llm(self) -> bool:
97
+ """Check if LLM is available."""
98
+ if self._mode == "b":
99
+ return True # Ollama assumed running
100
+ if self._mode == "c":
101
+ return bool(
102
+ os.environ.get("OPENROUTER_API_KEY")
103
+ or getattr(self._config.llm, 'api_key', None)
104
+ )
105
+ return False
106
+
107
+ def _call_llm(self, prompt: str, max_tokens: int = 200) -> str:
108
+ """Route to Ollama (B) or OpenRouter (C)."""
109
+ if self._mode == "b":
110
+ return self._call_ollama(prompt, max_tokens)
111
+ return self._call_openrouter(prompt, max_tokens)
112
+
113
+ def _call_ollama(self, prompt: str, max_tokens: int = 200) -> str:
114
+ """Call local Ollama for summary generation."""
115
+ import httpx
116
+ model = getattr(self._config.llm, 'model', None) or "llama3.1:8b"
117
+ with httpx.Client(timeout=httpx.Timeout(20.0)) as client:
118
+ resp = client.post("http://localhost:11434/api/generate", json={
119
+ "model": model,
120
+ "prompt": prompt,
121
+ "stream": False,
122
+ "options": {"num_predict": max_tokens, "temperature": 0.3},
123
+ })
124
+ resp.raise_for_status()
125
+ return resp.json().get("response", "").strip()
126
+
127
+ def _call_openrouter(self, prompt: str, max_tokens: int = 200) -> str:
128
+ """Call OpenRouter API for summary generation."""
129
+ import httpx
130
+ api_key = (
131
+ os.environ.get("OPENROUTER_API_KEY")
132
+ or getattr(self._config.llm, 'api_key', None)
133
+ )
134
+ if not api_key:
135
+ raise RuntimeError("No OpenRouter API key")
136
+ model = (
137
+ getattr(self._config.llm, 'model', None)
138
+ or "meta-llama/llama-3.1-8b-instruct:free"
139
+ )
140
+ with httpx.Client(timeout=httpx.Timeout(20.0)) as client:
141
+ resp = client.post(
142
+ "https://openrouter.ai/api/v1/chat/completions",
143
+ headers={
144
+ "Authorization": f"Bearer {api_key}",
145
+ "Content-Type": "application/json",
146
+ },
147
+ json={
148
+ "model": model,
149
+ "messages": [{"role": "user", "content": prompt}],
150
+ "max_tokens": max_tokens,
151
+ "temperature": 0.3,
152
+ },
153
+ )
154
+ resp.raise_for_status()
155
+ choices = resp.json().get("choices", [])
156
+ if choices:
157
+ return choices[0].get("message", {}).get("content", "").strip()
158
+ return ""
159
+
160
+ # ------------------------------------------------------------------
161
+ # Prompt templates
162
+ # ------------------------------------------------------------------
163
+
164
+ @staticmethod
165
+ def _cluster_prompt(texts: list[str]) -> str:
166
+ numbered = "\n".join(f"{i+1}. {t[:200]}" for i, t in enumerate(texts))
167
+ return (
168
+ "Summarize the following related memories in 2-3 concise sentences. "
169
+ "Focus on the common theme and key facts.\n\n"
170
+ f"Memories:\n{numbered}\n\n"
171
+ "Summary:"
172
+ )
173
+
174
+ @staticmethod
175
+ def _synthesis_prompt(query: str, texts: list[str]) -> str:
176
+ numbered = "\n".join(f"- {t[:200]}" for t in texts)
177
+ return (
178
+ f"Based on these stored memories, answer the question concisely.\n\n"
179
+ f"Question: {query}\n\n"
180
+ f"Relevant memories:\n{numbered}\n\n"
181
+ "Answer (2-3 sentences):"
182
+ )
@@ -0,0 +1,209 @@
1
+ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
2
+ # Licensed under the MIT License - see LICENSE file
3
+ # Part of SuperLocalMemory V3 | https://qualixar.com | https://varunpratap.com
4
+
5
+ """Recall worker pool — manages subprocess lifecycle for all callers.
6
+
7
+ Single shared worker process handles requests from dashboard, MCP, CLI.
8
+ Serializes concurrent requests via a threading lock (one at a time to
9
+ avoid interleaved stdout). Worker auto-kills after idle timeout.
10
+
11
+ Usage:
12
+ pool = WorkerPool.shared()
13
+ result = pool.recall("what is X?", limit=10)
14
+ result = pool.store("some content", metadata={})
15
+
16
+ Part of Qualixar | Author: Varun Pratap Bhardwaj
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ import json
22
+ import logging
23
+ import os
24
+ import subprocess
25
+ import sys
26
+ import threading
27
+ import time
28
+
29
+ logger = logging.getLogger(__name__)
30
+
31
+ _IDLE_TIMEOUT = 120 # 2 min — kill worker after idle
32
+ _REQUEST_TIMEOUT = 60 # 60 sec max per request
33
+
34
+
35
+ class WorkerPool:
36
+ """Manages a single recall_worker subprocess with idle auto-kill.
37
+
38
+ Thread-safe: concurrent callers are serialized via lock.
39
+ The worker subprocess holds all heavy memory (PyTorch, engine).
40
+ The calling process stays at ~60 MB.
41
+ """
42
+
43
+ _instance: WorkerPool | None = None
44
+ _instance_lock = threading.Lock()
45
+
46
+ def __init__(self) -> None:
47
+ self._lock = threading.Lock()
48
+ self._proc: subprocess.Popen | None = None
49
+ self._idle_timer: threading.Timer | None = None
50
+ self._last_used: float = 0.0
51
+
52
+ @classmethod
53
+ def shared(cls) -> WorkerPool:
54
+ """Get or create the singleton worker pool."""
55
+ if cls._instance is None:
56
+ with cls._instance_lock:
57
+ if cls._instance is None:
58
+ cls._instance = cls()
59
+ return cls._instance
60
+
61
+ # ------------------------------------------------------------------
62
+ # Public API
63
+ # ------------------------------------------------------------------
64
+
65
+ def recall(self, query: str, limit: int = 10) -> dict:
66
+ """Run recall in worker subprocess. Returns result dict."""
67
+ return self._send({"cmd": "recall", "query": query, "limit": limit})
68
+
69
+ def store(self, content: str, metadata: dict | None = None) -> dict:
70
+ """Run store in worker subprocess. Returns result dict."""
71
+ return self._send({
72
+ "cmd": "store", "content": content,
73
+ "metadata": metadata or {},
74
+ })
75
+
76
+ def get_memory_facts(self, memory_id: str) -> dict:
77
+ """Get original memory text + child atomic facts."""
78
+ return self._send({"cmd": "get_memory_facts", "memory_id": memory_id})
79
+
80
+ def summarize(self, texts: list[str]) -> dict:
81
+ """Generate summary from texts (heuristic in A, LLM in B/C)."""
82
+ return self._send({"cmd": "summarize", "texts": texts})
83
+
84
+ def synthesize(self, query: str, facts: list[dict]) -> dict:
85
+ """Generate synthesized answer from query + facts."""
86
+ return self._send({"cmd": "synthesize", "query": query, "facts": facts})
87
+
88
+ def status(self) -> dict:
89
+ """Get engine status from worker."""
90
+ return self._send({"cmd": "status"})
91
+
92
+ def shutdown(self) -> None:
93
+ """Gracefully kill the worker."""
94
+ with self._lock:
95
+ self._kill()
96
+
97
+ @property
98
+ def worker_pid(self) -> int | None:
99
+ """PID of the worker process, or None if not running."""
100
+ if self._proc and self._proc.poll() is None:
101
+ return self._proc.pid
102
+ return None
103
+
104
+ # ------------------------------------------------------------------
105
+ # Internals
106
+ # ------------------------------------------------------------------
107
+
108
+ def _send(self, request: dict) -> dict:
109
+ """Send request to worker and get response. Thread-safe."""
110
+ with self._lock:
111
+ self._ensure_worker()
112
+ if self._proc is None:
113
+ return {"ok": False, "error": "Worker failed to start"}
114
+
115
+ req_line = json.dumps(request) + "\n"
116
+ try:
117
+ self._proc.stdin.write(req_line)
118
+ self._proc.stdin.flush()
119
+
120
+ # Read response with timeout
121
+ import selectors
122
+ sel = selectors.DefaultSelector()
123
+ sel.register(self._proc.stdout, selectors.EVENT_READ)
124
+ ready = sel.select(timeout=_REQUEST_TIMEOUT)
125
+ sel.close()
126
+
127
+ if not ready:
128
+ logger.error("Worker timed out after %ds", _REQUEST_TIMEOUT)
129
+ self._kill()
130
+ return {"ok": False, "error": "Worker timed out"}
131
+
132
+ resp_line = self._proc.stdout.readline()
133
+ if not resp_line:
134
+ logger.warning("Worker returned empty, restarting")
135
+ self._kill()
136
+ return {"ok": False, "error": "Worker died"}
137
+
138
+ self._reset_idle_timer()
139
+ return json.loads(resp_line)
140
+
141
+ except (BrokenPipeError, OSError, json.JSONDecodeError) as exc:
142
+ logger.warning("Worker communication failed: %s", exc)
143
+ self._kill()
144
+ return {"ok": False, "error": str(exc)}
145
+
146
+ def _ensure_worker(self) -> None:
147
+ """Spawn worker if not running."""
148
+ if self._proc is not None and self._proc.poll() is None:
149
+ return
150
+ self._proc = None
151
+ try:
152
+ env = {
153
+ **os.environ,
154
+ "CUDA_VISIBLE_DEVICES": "",
155
+ "PYTORCH_MPS_HIGH_WATERMARK_RATIO": "0.0",
156
+ "PYTORCH_MPS_MEM_LIMIT": "0",
157
+ "PYTORCH_ENABLE_MPS_FALLBACK": "1",
158
+ "TOKENIZERS_PARALLELISM": "false",
159
+ "TORCH_DEVICE": "cpu",
160
+ }
161
+ self._proc = subprocess.Popen(
162
+ [sys.executable, "-m", "superlocalmemory.core.recall_worker"],
163
+ stdin=subprocess.PIPE,
164
+ stdout=subprocess.PIPE,
165
+ stderr=subprocess.DEVNULL,
166
+ text=True,
167
+ bufsize=1,
168
+ env=env,
169
+ )
170
+ logger.info("Recall worker spawned (PID %d)", self._proc.pid)
171
+ except Exception as exc:
172
+ logger.error("Failed to spawn recall worker: %s", exc)
173
+ self._proc = None
174
+
175
+ def _kill(self) -> None:
176
+ """Terminate worker. ALL memory freed to OS."""
177
+ if self._idle_timer is not None:
178
+ self._idle_timer.cancel()
179
+ self._idle_timer = None
180
+ if self._proc is not None:
181
+ pid = self._proc.pid
182
+ try:
183
+ self._proc.stdin.write('{"cmd":"quit"}\n')
184
+ self._proc.stdin.flush()
185
+ self._proc.wait(timeout=3)
186
+ except Exception:
187
+ try:
188
+ self._proc.kill()
189
+ self._proc.wait(timeout=2)
190
+ except Exception:
191
+ pass
192
+ self._proc = None
193
+ logger.info("Recall worker killed (PID %s)", pid)
194
+
195
+ def _reset_idle_timer(self) -> None:
196
+ """Kill worker after 2 min of no requests."""
197
+ if self._idle_timer is not None:
198
+ self._idle_timer.cancel()
199
+ self._idle_timer = threading.Timer(_IDLE_TIMEOUT, self._idle_kill)
200
+ self._idle_timer.daemon = True
201
+ self._idle_timer.start()
202
+ self._last_used = time.time()
203
+
204
+ def _idle_kill(self) -> None:
205
+ """Called by idle timer — kill worker to free memory."""
206
+ with self._lock:
207
+ if self._proc is not None:
208
+ logger.info("Idle timeout — killing recall worker")
209
+ self._kill()
@@ -11,6 +11,15 @@ Part of Qualixar | Author: Varun Pratap Bhardwaj
11
11
 
12
12
  from __future__ import annotations
13
13
 
14
+ # CRITICAL: Set BEFORE any torch/transformers import to prevent Metal/MPS
15
+ # GPU memory reservation on Apple Silicon.
16
+ import os as _os
17
+ _os.environ.setdefault('PYTORCH_MPS_HIGH_WATERMARK_RATIO', '0.0')
18
+ _os.environ.setdefault('PYTORCH_MPS_MEM_LIMIT', '0')
19
+ _os.environ.setdefault('PYTORCH_ENABLE_MPS_FALLBACK', '1')
20
+ _os.environ.setdefault('TOKENIZERS_PARALLELISM', 'false')
21
+ _os.environ.setdefault('TORCH_DEVICE', 'cpu')
22
+
14
23
  import logging
15
24
  import sys
16
25
 
@@ -35,10 +35,16 @@ def register_core_tools(server, get_engine: Callable) -> None:
35
35
  and indexes for 4-channel retrieval.
36
36
  """
37
37
  try:
38
- engine = get_engine()
39
- metadata: dict[str, Any] = {"tags": tags, "project": project, "importance": importance, "agent_id": agent_id}
40
- fact_ids = engine.store(content, session_id=session_id, metadata=metadata)
41
- return {"success": True, "fact_ids": fact_ids, "count": len(fact_ids)}
38
+ from superlocalmemory.core.worker_pool import WorkerPool
39
+ pool = WorkerPool.shared()
40
+ result = pool.store(content, metadata={
41
+ "tags": tags, "project": project,
42
+ "importance": importance, "agent_id": agent_id,
43
+ "session_id": session_id,
44
+ })
45
+ if result.get("ok"):
46
+ return {"success": True, "fact_ids": result.get("fact_ids", []), "count": result.get("count", 0)}
47
+ return {"success": False, "error": result.get("error", "Store failed")}
42
48
  except Exception as exc:
43
49
  logger.exception("remember failed")
44
50
  return {"success": False, "error": str(exc)}
@@ -47,10 +53,17 @@ def register_core_tools(server, get_engine: Callable) -> None:
47
53
  async def recall(query: str, limit: int = 10, agent_id: str = "mcp_client") -> dict:
48
54
  """Search memories by semantic query with 4-channel retrieval, RRF fusion, and reranking."""
49
55
  try:
50
- engine = get_engine()
51
- response = engine.recall(query, limit=limit)
52
- results = _format_results(response.results[:limit])
53
- return {"success": True, "results": results, "count": len(results), "query_type": response.query_type}
56
+ from superlocalmemory.core.worker_pool import WorkerPool
57
+ pool = WorkerPool.shared()
58
+ result = pool.recall(query, limit=limit)
59
+ if result.get("ok"):
60
+ return {
61
+ "success": True,
62
+ "results": result.get("results", []),
63
+ "count": result.get("result_count", 0),
64
+ "query_type": result.get("query_type", "unknown"),
65
+ }
66
+ return {"success": False, "error": result.get("error", "Recall failed")}
54
67
  except Exception as exc:
55
68
  logger.exception("recall failed")
56
69
  return {"success": False, "error": str(exc)}
@@ -23,6 +23,27 @@ UI_DIR = Path(__file__).parent.parent / "ui"
23
23
  PROFILES_DIR = MEMORY_DIR / "profiles"
24
24
 
25
25
 
26
+ def get_engine_lazy(app_state):
27
+ """Get or lazily initialize the V3 engine. Returns engine or None."""
28
+ engine = getattr(app_state, "engine", None)
29
+ if engine is not None:
30
+ return engine
31
+ if getattr(app_state, "_engine_init_attempted", False):
32
+ return None
33
+ try:
34
+ from superlocalmemory.core.config import SLMConfig
35
+ from superlocalmemory.core.engine import MemoryEngine
36
+ config = SLMConfig.load()
37
+ engine = MemoryEngine(config)
38
+ engine.initialize()
39
+ app_state.engine = engine
40
+ app_state._engine_init_attempted = True
41
+ return engine
42
+ except Exception:
43
+ app_state._engine_init_attempted = True
44
+ return None
45
+
46
+
26
47
  def get_db_connection() -> sqlite3.Connection:
27
48
  """Get database connection."""
28
49
  if not DB_PATH.exists():