superlocalmemory 3.3.18 → 3.3.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/pyproject.toml +1 -1
- package/scripts/postinstall.js +45 -19
- package/src/superlocalmemory/cli/commands.py +27 -3
- package/src/superlocalmemory/cli/main.py +13 -1
- package/src/superlocalmemory/cli/setup_wizard.py +403 -45
- package/src/superlocalmemory/core/config.py +1 -1
- package/src/superlocalmemory/core/embeddings.py +10 -3
- package/src/superlocalmemory/core/engine_wiring.py +44 -0
- package/src/superlocalmemory/mcp/server.py +60 -7
- package/src/superlocalmemory/mcp/tools_core.py +14 -6
- package/src/superlocalmemory/retrieval/bridge_discovery.py +5 -1
- package/src/superlocalmemory/retrieval/engine.py +4 -3
- package/src/superlocalmemory/retrieval/entity_channel.py +2 -2
- package/src/superlocalmemory/retrieval/quantization_aware_search.py +10 -2
- package/src/superlocalmemory/retrieval/semantic_channel.py +23 -5
- package/src/superlocalmemory/retrieval/spreading_activation.py +2 -2
- package/src/superlocalmemory/retrieval/strategy.py +40 -8
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "superlocalmemory",
|
|
3
|
-
"version": "3.3.
|
|
3
|
+
"version": "3.3.19",
|
|
4
4
|
"description": "Information-geometric agent memory with mathematical guarantees. 4-channel retrieval, Fisher-Rao similarity, zero-LLM mode, EU AI Act compliant. Works with Claude, Cursor, Windsurf, and 17+ AI tools.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"ai-memory",
|
package/pyproject.toml
CHANGED
package/scripts/postinstall.js
CHANGED
|
@@ -240,28 +240,54 @@ if (fs.existsSync(hooksDisabledFile)) {
|
|
|
240
240
|
}
|
|
241
241
|
}
|
|
242
242
|
|
|
243
|
+
// --- Step 6: Run interactive setup wizard ---
|
|
244
|
+
// Downloads embedding + reranker models, configures mode, verifies installation.
|
|
245
|
+
// If TTY is available (interactive terminal), runs the full wizard.
|
|
246
|
+
// If not (CI, piped), uses defaults (Mode A, skip model download).
|
|
247
|
+
console.log('\n════════════════════════════════════════════════════════════');
|
|
248
|
+
console.log(' Running setup wizard (model download + verification)...');
|
|
249
|
+
console.log('════════════════════════════════════════════════════════════\n');
|
|
250
|
+
|
|
251
|
+
const isTTY = process.stdin.isTTY && process.stdout.isTTY;
|
|
252
|
+
const setupArgs = isTTY ? ['setup'] : ['setup'];
|
|
253
|
+
const setupEnv = {
|
|
254
|
+
...process.env,
|
|
255
|
+
PATH: '/opt/homebrew/bin:/usr/local/bin:/usr/bin:' + (process.env.PATH || ''),
|
|
256
|
+
PYTHONPATH: path.join(__dirname, '..', 'src') + ':' + (process.env.PYTHONPATH || ''),
|
|
257
|
+
CUDA_VISIBLE_DEVICES: '',
|
|
258
|
+
TOKENIZERS_PARALLELISM: 'false',
|
|
259
|
+
TORCH_DEVICE: 'cpu',
|
|
260
|
+
};
|
|
261
|
+
|
|
262
|
+
// Non-interactive: set env flag so wizard uses defaults
|
|
263
|
+
if (!isTTY) {
|
|
264
|
+
setupEnv.SLM_NON_INTERACTIVE = '1';
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
const setupResult = spawnSync(pythonParts[0], [
|
|
268
|
+
...pythonParts.slice(1), '-m', 'superlocalmemory.cli.main', ...setupArgs,
|
|
269
|
+
], {
|
|
270
|
+
stdio: 'inherit', // Show all output including download progress
|
|
271
|
+
timeout: 900000, // 15 min (model downloads can be slow)
|
|
272
|
+
env: setupEnv,
|
|
273
|
+
});
|
|
274
|
+
|
|
275
|
+
if (setupResult.status === 0) {
|
|
276
|
+
console.log('✓ Setup wizard completed successfully');
|
|
277
|
+
} else {
|
|
278
|
+
console.log('⚠ Setup wizard had issues (run: slm setup)');
|
|
279
|
+
console.log(' SuperLocalMemory will still work — models download on first use.');
|
|
280
|
+
}
|
|
281
|
+
|
|
243
282
|
// --- Done ---
|
|
244
|
-
console.log('════════════════════════════════════════════════════════════');
|
|
245
|
-
console.log(' ✓ SuperLocalMemory V3 installed
|
|
283
|
+
console.log('\n════════════════════════════════════════════════════════════');
|
|
284
|
+
console.log(' ✓ SuperLocalMemory V3 installed!');
|
|
246
285
|
console.log('');
|
|
247
286
|
console.log(' Quick start:');
|
|
248
|
-
console.log('
|
|
249
|
-
console.log('');
|
|
250
|
-
console.log('
|
|
251
|
-
console.log(' slm
|
|
252
|
-
console.log(' slm warmup # Pre-download embedding model (~500MB)');
|
|
253
|
-
console.log(' slm remember "..." # Store a memory');
|
|
254
|
-
console.log(' slm recall "..." # Search memories');
|
|
255
|
-
console.log(' slm dashboard # Open 17-tab web dashboard');
|
|
256
|
-
console.log(' slm hooks status # Check hook installation');
|
|
257
|
-
console.log(' slm hooks remove # Opt out of auto-memory hooks');
|
|
258
|
-
console.log('');
|
|
259
|
-
console.log(' Prerequisites satisfied:');
|
|
260
|
-
console.log(' ✓ Python 3.11+');
|
|
261
|
-
console.log(' ✓ Core math & search libraries');
|
|
262
|
-
console.log(' ✓ Dashboard server (fastapi, uvicorn)');
|
|
263
|
-
console.log(' ✓ Learning engine (lightgbm)');
|
|
264
|
-
console.log(' ✓ Data directory (~/.superlocalmemory/)');
|
|
287
|
+
console.log(' slm remember "..." # Store a memory');
|
|
288
|
+
console.log(' slm recall "..." # Search memories');
|
|
289
|
+
console.log(' slm dashboard # Open web dashboard');
|
|
290
|
+
console.log(' slm setup # Re-run setup wizard');
|
|
265
291
|
console.log('');
|
|
266
292
|
console.log(' Docs: https://github.com/qualixar/superlocalmemory/wiki');
|
|
267
293
|
console.log('════════════════════════════════════════════════════════════\n');
|
|
@@ -68,11 +68,12 @@ def dispatch(args: Namespace) -> None:
|
|
|
68
68
|
# -- Setup & Config (no --json — interactive commands) ---------------------
|
|
69
69
|
|
|
70
70
|
|
|
71
|
-
def cmd_setup(
|
|
71
|
+
def cmd_setup(args: Namespace) -> None:
|
|
72
72
|
"""Run the interactive setup wizard."""
|
|
73
73
|
from superlocalmemory.cli.setup_wizard import run_wizard
|
|
74
74
|
|
|
75
|
-
run_wizard()
|
|
75
|
+
run_wizard(auto=getattr(args, "auto", False))
|
|
76
|
+
sys.exit(0) # Force clean exit (background threads from imports may linger)
|
|
76
77
|
|
|
77
78
|
|
|
78
79
|
def cmd_mode(args: Namespace) -> None:
|
|
@@ -249,9 +250,32 @@ def cmd_list(args: Namespace) -> None:
|
|
|
249
250
|
def cmd_remember(args: Namespace) -> None:
|
|
250
251
|
"""Store a memory via the engine."""
|
|
251
252
|
from superlocalmemory.core.config import SLMConfig
|
|
252
|
-
from superlocalmemory.core.engine import MemoryEngine
|
|
253
253
|
|
|
254
254
|
use_json = getattr(args, 'json', False)
|
|
255
|
+
fire_and_forget = getattr(args, 'fire_and_forget', False)
|
|
256
|
+
|
|
257
|
+
# V3.3.19: --async flag for hooks/scripts — spawn background process, return instantly
|
|
258
|
+
if fire_and_forget:
|
|
259
|
+
import subprocess
|
|
260
|
+
cmd = [sys.executable, "-m", "superlocalmemory.cli.main", "remember", args.content]
|
|
261
|
+
if args.tags:
|
|
262
|
+
cmd.extend(["--tags", args.tags])
|
|
263
|
+
if use_json:
|
|
264
|
+
cmd.append("--json")
|
|
265
|
+
# Spawn detached subprocess — parent exits immediately
|
|
266
|
+
subprocess.Popen(
|
|
267
|
+
cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL,
|
|
268
|
+
start_new_session=True,
|
|
269
|
+
)
|
|
270
|
+
if use_json:
|
|
271
|
+
from superlocalmemory.cli.json_output import json_print
|
|
272
|
+
json_print("remember", data={"queued": True, "async": True})
|
|
273
|
+
else:
|
|
274
|
+
print("Queued for background processing.")
|
|
275
|
+
return
|
|
276
|
+
|
|
277
|
+
from superlocalmemory.core.engine import MemoryEngine
|
|
278
|
+
|
|
255
279
|
try:
|
|
256
280
|
config = SLMConfig.load()
|
|
257
281
|
engine = MemoryEngine(config)
|
|
@@ -100,7 +100,11 @@ def main() -> None:
|
|
|
100
100
|
help="Enable PreToolUse gate (experimental — blocks tools until session_init)",
|
|
101
101
|
)
|
|
102
102
|
|
|
103
|
-
sub.add_parser("setup", help="Interactive first-time setup wizard")
|
|
103
|
+
setup_p = sub.add_parser("setup", help="Interactive first-time setup wizard")
|
|
104
|
+
setup_p.add_argument(
|
|
105
|
+
"--auto", action="store_true",
|
|
106
|
+
help="Non-interactive mode: use defaults (for CI/scripts)",
|
|
107
|
+
)
|
|
104
108
|
|
|
105
109
|
mode_p = sub.add_parser("mode", help="Get or set operating mode (a/b/c)")
|
|
106
110
|
mode_p.add_argument(
|
|
@@ -130,6 +134,10 @@ def main() -> None:
|
|
|
130
134
|
remember_p.add_argument("content", help="Content to remember")
|
|
131
135
|
remember_p.add_argument("--tags", default="", help="Comma-separated tags")
|
|
132
136
|
remember_p.add_argument("--json", action="store_true", help="Output structured JSON (agent-native)")
|
|
137
|
+
remember_p.add_argument(
|
|
138
|
+
"--async", dest="fire_and_forget", action="store_true",
|
|
139
|
+
help="Return immediately, process in background (for hooks/scripts)",
|
|
140
|
+
)
|
|
133
141
|
|
|
134
142
|
recall_p = sub.add_parser("recall", help="Semantic search with 4-channel retrieval")
|
|
135
143
|
recall_p.add_argument("query", help="Search query")
|
|
@@ -262,6 +270,10 @@ def main() -> None:
|
|
|
262
270
|
parser.print_help()
|
|
263
271
|
sys.exit(0)
|
|
264
272
|
|
|
273
|
+
# V3.3.19: Auto-trigger setup wizard on first use
|
|
274
|
+
from superlocalmemory.cli.setup_wizard import check_first_use
|
|
275
|
+
check_first_use(args.command)
|
|
276
|
+
|
|
265
277
|
from superlocalmemory.cli.commands import dispatch
|
|
266
278
|
|
|
267
279
|
dispatch(args)
|
|
@@ -4,7 +4,11 @@
|
|
|
4
4
|
|
|
5
5
|
"""Interactive setup wizard for first-time configuration.
|
|
6
6
|
|
|
7
|
-
|
|
7
|
+
Runs automatically on first use of any `slm` command, or via `slm setup`.
|
|
8
|
+
Downloads models, configures mode, verifies installation.
|
|
9
|
+
|
|
10
|
+
For npm: triggered by postinstall.js after dependency installation.
|
|
11
|
+
For pip: triggered on first `slm` command when .setup-complete is missing.
|
|
8
12
|
|
|
9
13
|
Part of Qualixar | Author: Varun Pratap Bhardwaj
|
|
10
14
|
"""
|
|
@@ -12,98 +16,453 @@ Part of Qualixar | Author: Varun Pratap Bhardwaj
|
|
|
12
16
|
from __future__ import annotations
|
|
13
17
|
|
|
14
18
|
import os
|
|
19
|
+
import platform
|
|
15
20
|
import shutil
|
|
21
|
+
import subprocess
|
|
22
|
+
import sys
|
|
23
|
+
import time
|
|
24
|
+
from pathlib import Path
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
# ---------------------------------------------------------------------------
|
|
28
|
+
# Constants
|
|
29
|
+
# ---------------------------------------------------------------------------
|
|
30
|
+
|
|
31
|
+
_SLM_HOME = Path(os.environ.get("SL_MEMORY_PATH", Path.home() / ".superlocalmemory"))
|
|
32
|
+
_SETUP_MARKER = _SLM_HOME / ".setup-complete"
|
|
33
|
+
_EMBED_MODEL = "nomic-ai/nomic-embed-text-v1.5"
|
|
34
|
+
_RERANKER_MODEL = "cross-encoder/ms-marco-MiniLM-L-12-v2"
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
# ---------------------------------------------------------------------------
|
|
38
|
+
# Detection helpers
|
|
39
|
+
# ---------------------------------------------------------------------------
|
|
40
|
+
|
|
41
|
+
def is_interactive() -> bool:
|
|
42
|
+
"""True if running in a terminal (not CI, not piped, not MCP)."""
|
|
43
|
+
if os.environ.get("CI"):
|
|
44
|
+
return False
|
|
45
|
+
if os.environ.get("SLM_NON_INTERACTIVE"):
|
|
46
|
+
return False
|
|
47
|
+
return sys.stdin.isatty() and sys.stdout.isatty()
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def is_setup_complete() -> bool:
|
|
51
|
+
"""True if the setup wizard has been run at least once."""
|
|
52
|
+
return _SETUP_MARKER.exists()
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def needs_setup() -> bool:
|
|
56
|
+
"""True if setup should auto-trigger (first use)."""
|
|
57
|
+
return not is_setup_complete()
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def _prompt(message: str, default: str = "") -> str:
|
|
61
|
+
"""Prompt user for input. Returns default if non-interactive."""
|
|
62
|
+
if not is_interactive():
|
|
63
|
+
return default
|
|
64
|
+
try:
|
|
65
|
+
return input(message).strip() or default
|
|
66
|
+
except (EOFError, KeyboardInterrupt):
|
|
67
|
+
print()
|
|
68
|
+
return default
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def _get_ram_gb() -> float:
|
|
72
|
+
"""Get total system RAM in GB."""
|
|
73
|
+
try:
|
|
74
|
+
import psutil
|
|
75
|
+
return psutil.virtual_memory().total / (1024 ** 3)
|
|
76
|
+
except ImportError:
|
|
77
|
+
pass
|
|
78
|
+
# Fallback: macOS
|
|
79
|
+
if platform.system() == "Darwin":
|
|
80
|
+
try:
|
|
81
|
+
out = subprocess.check_output(["sysctl", "-n", "hw.memsize"], text=True)
|
|
82
|
+
return int(out.strip()) / (1024 ** 3)
|
|
83
|
+
except Exception:
|
|
84
|
+
pass
|
|
85
|
+
# Fallback: Linux
|
|
86
|
+
try:
|
|
87
|
+
with open("/proc/meminfo") as f:
|
|
88
|
+
for line in f:
|
|
89
|
+
if line.startswith("MemTotal:"):
|
|
90
|
+
return int(line.split()[1]) / (1024 ** 2)
|
|
91
|
+
except Exception:
|
|
92
|
+
pass
|
|
93
|
+
return 0.0
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
# ---------------------------------------------------------------------------
|
|
97
|
+
# Model download
|
|
98
|
+
# ---------------------------------------------------------------------------
|
|
99
|
+
|
|
100
|
+
def _download_model(model_name: str, label: str) -> bool:
|
|
101
|
+
"""Download a HuggingFace model with visible progress.
|
|
16
102
|
|
|
103
|
+
Runs in a subprocess so the main process never loads torch.
|
|
104
|
+
stderr is inherited so the user sees download progress bars.
|
|
105
|
+
Returns True on success.
|
|
106
|
+
"""
|
|
107
|
+
print(f"\n Downloading {label}: {model_name}")
|
|
108
|
+
print(f" (this may take a few minutes on first run)\n")
|
|
109
|
+
|
|
110
|
+
script = (
|
|
111
|
+
f"import sys; "
|
|
112
|
+
f"from sentence_transformers import SentenceTransformer; "
|
|
113
|
+
f"m = SentenceTransformer('{model_name}', trust_remote_code=True); "
|
|
114
|
+
f"d = m.get_sentence_embedding_dimension(); "
|
|
115
|
+
f"print(f'OK dim={{d}}'); "
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
try:
|
|
119
|
+
result = subprocess.run(
|
|
120
|
+
[sys.executable, "-c", script],
|
|
121
|
+
timeout=600, # 10 min for large model downloads
|
|
122
|
+
capture_output=False, # Show download progress
|
|
123
|
+
text=True,
|
|
124
|
+
env={
|
|
125
|
+
**os.environ,
|
|
126
|
+
"CUDA_VISIBLE_DEVICES": "",
|
|
127
|
+
"TOKENIZERS_PARALLELISM": "false",
|
|
128
|
+
"TORCH_DEVICE": "cpu",
|
|
129
|
+
},
|
|
130
|
+
)
|
|
131
|
+
if result.returncode == 0:
|
|
132
|
+
print(f" ✓ {label} ready")
|
|
133
|
+
return True
|
|
134
|
+
print(f" ✗ {label} download failed (exit code {result.returncode})")
|
|
135
|
+
return False
|
|
136
|
+
except subprocess.TimeoutExpired:
|
|
137
|
+
print(f" ✗ {label} download timed out (10 min)")
|
|
138
|
+
return False
|
|
139
|
+
except FileNotFoundError:
|
|
140
|
+
print(f" ✗ Python not found: {sys.executable}")
|
|
141
|
+
return False
|
|
142
|
+
except Exception as exc:
|
|
143
|
+
print(f" ✗ {label} download error: {exc}")
|
|
144
|
+
return False
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def _download_reranker(model_name: str) -> bool:
|
|
148
|
+
"""Download cross-encoder reranker model."""
|
|
149
|
+
print(f"\n Downloading reranker: {model_name}")
|
|
150
|
+
print(f" (cross-encoder for result re-ranking)\n")
|
|
151
|
+
|
|
152
|
+
script = (
|
|
153
|
+
f"from sentence_transformers import CrossEncoder; "
|
|
154
|
+
f"m = CrossEncoder('{model_name}', trust_remote_code=True); "
|
|
155
|
+
f"print('OK'); "
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
try:
|
|
159
|
+
result = subprocess.run(
|
|
160
|
+
[sys.executable, "-c", script],
|
|
161
|
+
timeout=300,
|
|
162
|
+
capture_output=False,
|
|
163
|
+
text=True,
|
|
164
|
+
env={
|
|
165
|
+
**os.environ,
|
|
166
|
+
"CUDA_VISIBLE_DEVICES": "",
|
|
167
|
+
"TOKENIZERS_PARALLELISM": "false",
|
|
168
|
+
"TORCH_DEVICE": "cpu",
|
|
169
|
+
},
|
|
170
|
+
)
|
|
171
|
+
if result.returncode == 0:
|
|
172
|
+
print(f" ✓ Reranker ready")
|
|
173
|
+
return True
|
|
174
|
+
print(f" ✗ Reranker download failed")
|
|
175
|
+
return False
|
|
176
|
+
except Exception as exc:
|
|
177
|
+
print(f" ✗ Reranker error: {exc}")
|
|
178
|
+
return False
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
# ---------------------------------------------------------------------------
|
|
182
|
+
# Verification
|
|
183
|
+
# ---------------------------------------------------------------------------
|
|
184
|
+
|
|
185
|
+
def _verify_installation() -> bool:
|
|
186
|
+
"""Quick smoke test: embed a sentence, verify dimension."""
|
|
187
|
+
print("\n Running verification test...")
|
|
188
|
+
|
|
189
|
+
script = (
|
|
190
|
+
"from superlocalmemory.core.embeddings import EmbeddingService; "
|
|
191
|
+
"from superlocalmemory.core.config import EmbeddingConfig; "
|
|
192
|
+
"cfg = EmbeddingConfig(); "
|
|
193
|
+
"svc = EmbeddingService(cfg); "
|
|
194
|
+
"vec = svc.embed('SuperLocalMemory setup verification test'); "
|
|
195
|
+
"print(f'OK dim={len(vec)}' if vec else 'FAIL'); "
|
|
196
|
+
"svc.unload(); "
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
try:
|
|
200
|
+
result = subprocess.run(
|
|
201
|
+
[sys.executable, "-c", script],
|
|
202
|
+
timeout=120,
|
|
203
|
+
capture_output=True,
|
|
204
|
+
text=True,
|
|
205
|
+
env={
|
|
206
|
+
**os.environ,
|
|
207
|
+
"CUDA_VISIBLE_DEVICES": "",
|
|
208
|
+
"TOKENIZERS_PARALLELISM": "false",
|
|
209
|
+
"TORCH_DEVICE": "cpu",
|
|
210
|
+
},
|
|
211
|
+
)
|
|
212
|
+
stdout = result.stdout.strip()
|
|
213
|
+
if "OK dim=" in stdout:
|
|
214
|
+
dim = stdout.split("dim=")[1]
|
|
215
|
+
print(f" ✓ Embedding verified (dimension={dim})")
|
|
216
|
+
return True
|
|
217
|
+
print(f" ✗ Verification failed: {stdout}")
|
|
218
|
+
if result.stderr:
|
|
219
|
+
# Show last 3 lines of stderr for diagnosis
|
|
220
|
+
lines = result.stderr.strip().split("\n")
|
|
221
|
+
for line in lines[-3:]:
|
|
222
|
+
print(f" {line}")
|
|
223
|
+
return False
|
|
224
|
+
except subprocess.TimeoutExpired:
|
|
225
|
+
print(" ✗ Verification timed out (120s)")
|
|
226
|
+
return False
|
|
227
|
+
except Exception as exc:
|
|
228
|
+
print(f" ✗ Verification error: {exc}")
|
|
229
|
+
return False
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
# ---------------------------------------------------------------------------
|
|
233
|
+
# Mark setup complete
|
|
234
|
+
# ---------------------------------------------------------------------------
|
|
235
|
+
|
|
236
|
+
def _mark_complete() -> None:
|
|
237
|
+
"""Write .setup-complete marker file."""
|
|
238
|
+
_SLM_HOME.mkdir(parents=True, exist_ok=True)
|
|
239
|
+
_SETUP_MARKER.write_text(
|
|
240
|
+
f"setup_completed={time.strftime('%Y-%m-%dT%H:%M:%S')}\n"
|
|
241
|
+
f"python={sys.executable}\n"
|
|
242
|
+
f"platform={platform.system()}\n"
|
|
243
|
+
f"version={platform.python_version()}\n"
|
|
244
|
+
)
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
# ---------------------------------------------------------------------------
|
|
248
|
+
# Main wizard
|
|
249
|
+
# ---------------------------------------------------------------------------
|
|
250
|
+
|
|
251
|
+
def run_wizard(auto: bool = False) -> None:
|
|
252
|
+
"""Run the interactive setup wizard.
|
|
253
|
+
|
|
254
|
+
Args:
|
|
255
|
+
auto: If True, use defaults without prompting (for npm postinstall
|
|
256
|
+
or CI environments).
|
|
257
|
+
"""
|
|
258
|
+
interactive = is_interactive() and not auto
|
|
17
259
|
|
|
18
|
-
def run_wizard() -> None:
|
|
19
|
-
"""Run the interactive setup wizard."""
|
|
20
260
|
print()
|
|
21
|
-
print("
|
|
22
|
-
print("
|
|
261
|
+
print("╔══════════════════════════════════════════════════════════╗")
|
|
262
|
+
print("║ SuperLocalMemory V3 — Setup Wizard ║")
|
|
263
|
+
print("║ by Varun Pratap Bhardwaj / Qualixar ║")
|
|
264
|
+
print("╚══════════════════════════════════════════════════════════╝")
|
|
265
|
+
print()
|
|
266
|
+
|
|
267
|
+
# -- Step 1: System check --
|
|
268
|
+
print("─── Step 1/5: System Check ───")
|
|
269
|
+
print()
|
|
270
|
+
py_ver = platform.python_version()
|
|
271
|
+
py_ok = sys.version_info >= (3, 11)
|
|
272
|
+
ram_gb = _get_ram_gb()
|
|
273
|
+
print(f" Python: {py_ver} {'✓' if py_ok else '✗ (3.11+ required)'}")
|
|
274
|
+
print(f" Platform: {platform.system()} {platform.machine()}")
|
|
275
|
+
if ram_gb > 0:
|
|
276
|
+
print(f" RAM: {ram_gb:.1f} GB {'✓' if ram_gb >= 4 else '⚠ (4GB+ recommended)'}")
|
|
277
|
+
print(f" Data dir: {_SLM_HOME}")
|
|
278
|
+
|
|
279
|
+
# Check sentence-transformers
|
|
280
|
+
st_ok = False
|
|
281
|
+
try:
|
|
282
|
+
import sentence_transformers # noqa: F401
|
|
283
|
+
st_ok = True
|
|
284
|
+
print(f" sentence-transformers: ✓")
|
|
285
|
+
except ImportError:
|
|
286
|
+
print(f" sentence-transformers: ✗ (not installed)")
|
|
287
|
+
print(f" Run: pip install 'sentence-transformers>=4.0.0'")
|
|
288
|
+
|
|
289
|
+
if not py_ok:
|
|
290
|
+
print("\n ✗ Python 3.11+ is required. Please upgrade Python.")
|
|
291
|
+
print(" https://python.org/downloads/")
|
|
292
|
+
return
|
|
293
|
+
|
|
294
|
+
# -- Step 2: Mode selection --
|
|
23
295
|
print()
|
|
24
|
-
print("Choose
|
|
296
|
+
print("─── Step 2/5: Choose Operating Mode ───")
|
|
25
297
|
print()
|
|
26
|
-
print(" [A] Local Guardian (
|
|
27
|
-
print(" Zero cloud. Zero LLM.
|
|
298
|
+
print(" [A] Local Guardian (recommended)")
|
|
299
|
+
print(" Zero cloud. Zero LLM. Full privacy.")
|
|
28
300
|
print(" EU AI Act compliant. Works immediately.")
|
|
29
301
|
print()
|
|
30
302
|
print(" [B] Smart Local")
|
|
31
|
-
print(" Local LLM via Ollama for
|
|
32
|
-
print("
|
|
303
|
+
print(" Local LLM via Ollama for enrichment.")
|
|
304
|
+
print(" Data stays on your machine.")
|
|
33
305
|
print()
|
|
34
306
|
print(" [C] Full Power")
|
|
35
|
-
print(" Cloud LLM for
|
|
36
|
-
print(" Requires
|
|
307
|
+
print(" Cloud LLM for maximum accuracy.")
|
|
308
|
+
print(" Requires API key.")
|
|
37
309
|
print()
|
|
38
310
|
|
|
39
|
-
|
|
311
|
+
if interactive:
|
|
312
|
+
choice = _prompt(" Select mode [A/B/C] (default: A): ", "a").lower()
|
|
313
|
+
else:
|
|
314
|
+
choice = "a"
|
|
315
|
+
print(" Auto-selecting Mode A (non-interactive)")
|
|
40
316
|
|
|
41
317
|
if choice not in ("a", "b", "c"):
|
|
42
|
-
print(f"Invalid choice
|
|
318
|
+
print(f" Invalid choice '{choice}', using Mode A.")
|
|
43
319
|
choice = "a"
|
|
44
320
|
|
|
45
321
|
from superlocalmemory.core.config import SLMConfig
|
|
46
322
|
from superlocalmemory.storage.models import Mode
|
|
47
323
|
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
config.save()
|
|
51
|
-
print()
|
|
52
|
-
print("Mode A configured. Zero cloud, zero LLM.")
|
|
53
|
-
print(f"Config saved to: {config.base_dir / 'config.json'}")
|
|
324
|
+
mode_map = {"a": Mode.A, "b": Mode.B, "c": Mode.C}
|
|
325
|
+
config = SLMConfig.for_mode(mode_map[choice])
|
|
54
326
|
|
|
55
|
-
|
|
56
|
-
config = SLMConfig.for_mode(Mode.B)
|
|
327
|
+
if choice == "b":
|
|
57
328
|
print()
|
|
58
|
-
print("Checking for Ollama...")
|
|
59
329
|
if shutil.which("ollama"):
|
|
60
|
-
print(" Ollama found!")
|
|
330
|
+
print(" ✓ Ollama found!")
|
|
61
331
|
else:
|
|
62
|
-
print(" Ollama not found. Install
|
|
63
|
-
print("
|
|
64
|
-
config.save()
|
|
65
|
-
print(f"Config saved to: {config.base_dir / 'config.json'}")
|
|
332
|
+
print(" ⚠ Ollama not found. Install: https://ollama.ai")
|
|
333
|
+
print(" After installing: ollama pull llama3.2")
|
|
66
334
|
|
|
67
|
-
|
|
68
|
-
config = SLMConfig.for_mode(Mode.C)
|
|
335
|
+
if choice == "c" and interactive:
|
|
69
336
|
configure_provider(config)
|
|
337
|
+
else:
|
|
338
|
+
config.save()
|
|
339
|
+
|
|
340
|
+
mode_names = {"a": "Local Guardian", "b": "Smart Local", "c": "Full Power"}
|
|
341
|
+
print(f"\n ✓ Mode {choice.upper()} ({mode_names[choice]}) configured")
|
|
70
342
|
|
|
343
|
+
# -- Step 3: Download embedding model --
|
|
71
344
|
print()
|
|
72
|
-
print("
|
|
345
|
+
print("─── Step 3/5: Download Embedding Model ───")
|
|
346
|
+
|
|
347
|
+
if not st_ok:
|
|
348
|
+
print(" ⚠ Skipped (sentence-transformers not installed)")
|
|
349
|
+
print(" Models will download on first use.")
|
|
350
|
+
else:
|
|
351
|
+
embed_ok = _download_model(_EMBED_MODEL, "Embedding model")
|
|
352
|
+
if not embed_ok:
|
|
353
|
+
print(" ⚠ Model will download on first use (may take a few minutes)")
|
|
354
|
+
|
|
355
|
+
# -- Step 4: Download reranker model --
|
|
73
356
|
print()
|
|
357
|
+
print("─── Step 4/5: Download Reranker Model ───")
|
|
74
358
|
|
|
359
|
+
if not st_ok:
|
|
360
|
+
print(" ⚠ Skipped (sentence-transformers not installed)")
|
|
361
|
+
else:
|
|
362
|
+
_download_reranker(_RERANKER_MODEL)
|
|
75
363
|
|
|
76
|
-
|
|
77
|
-
|
|
364
|
+
# -- Step 5: Verification --
|
|
365
|
+
print()
|
|
366
|
+
print("─── Step 5/5: Verification ───")
|
|
78
367
|
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
368
|
+
if st_ok:
|
|
369
|
+
verified = _verify_installation()
|
|
370
|
+
else:
|
|
371
|
+
print(" ⚠ Skipped (sentence-transformers not installed)")
|
|
372
|
+
verified = False
|
|
373
|
+
|
|
374
|
+
# -- Done --
|
|
375
|
+
_mark_complete()
|
|
376
|
+
|
|
377
|
+
print()
|
|
378
|
+
print("╔══════════════════════════════════════════════════════════╗")
|
|
379
|
+
if verified:
|
|
380
|
+
print("║ ✓ Setup Complete — SuperLocalMemory is ready! ║")
|
|
381
|
+
else:
|
|
382
|
+
print("║ ✓ Setup Complete — basic config saved ║")
|
|
383
|
+
print("║ Models will auto-download on first use ║")
|
|
384
|
+
print("╚══════════════════════════════════════════════════════════╝")
|
|
385
|
+
print()
|
|
386
|
+
print(" Quick start:")
|
|
387
|
+
print(' slm remember "your first memory"')
|
|
388
|
+
print(' slm recall "search query"')
|
|
389
|
+
print(" slm dashboard")
|
|
390
|
+
print()
|
|
391
|
+
print(" Need help?")
|
|
392
|
+
print(" slm doctor — diagnose issues")
|
|
393
|
+
print(" slm --help — all commands")
|
|
394
|
+
print(" https://github.com/qualixar/superlocalmemory")
|
|
395
|
+
print()
|
|
396
|
+
|
|
397
|
+
|
|
398
|
+
# ---------------------------------------------------------------------------
|
|
399
|
+
# First-use auto-trigger
|
|
400
|
+
# ---------------------------------------------------------------------------
|
|
401
|
+
|
|
402
|
+
def check_first_use(command: str) -> None:
|
|
403
|
+
"""Check if setup is needed before running a command.
|
|
404
|
+
|
|
405
|
+
Called from main.py before dispatching any command.
|
|
406
|
+
Skips for commands that don't need setup (setup, hook, --version, --help).
|
|
82
407
|
"""
|
|
408
|
+
# Commands that work without setup
|
|
409
|
+
_SKIP_COMMANDS = {"setup", "init", "hook", "hooks", "reap", "mcp"}
|
|
410
|
+
if command in _SKIP_COMMANDS:
|
|
411
|
+
return
|
|
412
|
+
|
|
413
|
+
if is_setup_complete():
|
|
414
|
+
return
|
|
415
|
+
|
|
416
|
+
# Non-interactive: use defaults silently, don't block the command
|
|
417
|
+
if not is_interactive():
|
|
418
|
+
# Just create config with defaults and mark complete
|
|
419
|
+
try:
|
|
420
|
+
from superlocalmemory.core.config import SLMConfig
|
|
421
|
+
from superlocalmemory.storage.models import Mode
|
|
422
|
+
config = SLMConfig.for_mode(Mode.A)
|
|
423
|
+
config.save()
|
|
424
|
+
_mark_complete()
|
|
425
|
+
except Exception:
|
|
426
|
+
pass
|
|
427
|
+
return
|
|
428
|
+
|
|
429
|
+
# Interactive: run the full wizard
|
|
430
|
+
print()
|
|
431
|
+
print(" First time using SuperLocalMemory!")
|
|
432
|
+
print(" Running setup wizard...\n")
|
|
433
|
+
run_wizard()
|
|
434
|
+
|
|
435
|
+
|
|
436
|
+
# ---------------------------------------------------------------------------
|
|
437
|
+
# Mode C provider config (preserved from original)
|
|
438
|
+
# ---------------------------------------------------------------------------
|
|
439
|
+
|
|
440
|
+
def configure_provider(config: object) -> None:
|
|
441
|
+
"""Configure LLM provider for Mode C."""
|
|
83
442
|
from superlocalmemory.core.config import SLMConfig
|
|
84
443
|
from superlocalmemory.storage.models import Mode
|
|
85
444
|
|
|
86
445
|
presets = SLMConfig.provider_presets()
|
|
87
446
|
|
|
88
447
|
print()
|
|
89
|
-
print("Choose your LLM provider:")
|
|
448
|
+
print(" Choose your LLM provider:")
|
|
90
449
|
print()
|
|
91
450
|
providers = list(presets.keys())
|
|
92
451
|
for i, name in enumerate(providers, 1):
|
|
93
452
|
preset = presets[name]
|
|
94
|
-
print(f"
|
|
453
|
+
print(f" [{i}] {name.capitalize()} — {preset['model']}")
|
|
95
454
|
print()
|
|
96
455
|
|
|
97
|
-
idx =
|
|
456
|
+
idx = _prompt(f" Select provider [1-{len(providers)}]: ", "1")
|
|
98
457
|
try:
|
|
99
458
|
provider_name = providers[int(idx) - 1]
|
|
100
459
|
except (ValueError, IndexError):
|
|
101
|
-
print("Invalid choice. Using OpenAI.")
|
|
460
|
+
print(" Invalid choice. Using OpenAI.")
|
|
102
461
|
provider_name = "openai"
|
|
103
462
|
|
|
104
463
|
preset = presets[provider_name]
|
|
105
464
|
|
|
106
|
-
# Resolve API key
|
|
465
|
+
# Resolve API key
|
|
107
466
|
env_key = preset.get("env_key", "")
|
|
108
467
|
api_key = ""
|
|
109
468
|
if env_key:
|
|
@@ -111,10 +470,10 @@ def configure_provider(config: object) -> None:
|
|
|
111
470
|
if existing:
|
|
112
471
|
print(f" Found {env_key} in environment.")
|
|
113
472
|
api_key = existing
|
|
114
|
-
|
|
115
|
-
api_key =
|
|
473
|
+
elif is_interactive():
|
|
474
|
+
api_key = _prompt(
|
|
116
475
|
f" Enter your {provider_name.capitalize()} API key: ",
|
|
117
|
-
)
|
|
476
|
+
)
|
|
118
477
|
|
|
119
478
|
updated = SLMConfig.for_mode(
|
|
120
479
|
Mode.C,
|
|
@@ -126,4 +485,3 @@ def configure_provider(config: object) -> None:
|
|
|
126
485
|
updated.save()
|
|
127
486
|
print(f" Provider: {provider_name}")
|
|
128
487
|
print(f" Model: {preset['model']}")
|
|
129
|
-
print(f"Config saved to: {updated.base_dir / 'config.json'}")
|
|
@@ -619,7 +619,7 @@ class SLMConfig:
|
|
|
619
619
|
# The user's explicit choice always wins.
|
|
620
620
|
if "cross_encoder_backend" not in rt:
|
|
621
621
|
rt.setdefault("cross_encoder_model", "cross-encoder/ms-marco-MiniLM-L-12-v2")
|
|
622
|
-
rt["cross_encoder_backend"] = "
|
|
622
|
+
rt["cross_encoder_backend"] = "" # V3.3.18: PyTorch (ONNX CoreML leaks on ARM64)
|
|
623
623
|
# Only auto-enable if user didn't explicitly set the field
|
|
624
624
|
rt.setdefault("use_cross_encoder", True)
|
|
625
625
|
config.retrieval = RetrievalConfig(**{
|
|
@@ -191,11 +191,18 @@ class EmbeddingService:
|
|
|
191
191
|
)
|
|
192
192
|
if not resp_line:
|
|
193
193
|
logger.warning(
|
|
194
|
-
"Embedding worker timed out after %ds.
|
|
195
|
-
"
|
|
196
|
-
"diagnose or 'slm warmup' to pre-download the model.",
|
|
194
|
+
"Embedding worker timed out after %ds. "
|
|
195
|
+
"Run 'slm setup' to download models and verify installation.",
|
|
197
196
|
_SUBPROCESS_RESPONSE_TIMEOUT,
|
|
198
197
|
)
|
|
198
|
+
# Print to stderr so CLI users see this even without logging
|
|
199
|
+
print(
|
|
200
|
+
f"\n⚠ Embedding worker did not respond within "
|
|
201
|
+
f"{_SUBPROCESS_RESPONSE_TIMEOUT}s.\n"
|
|
202
|
+
f" Run: slm setup (download models + verify)\n"
|
|
203
|
+
f" Run: slm doctor (diagnose issues)\n",
|
|
204
|
+
file=sys.stderr,
|
|
205
|
+
)
|
|
199
206
|
self._kill_worker()
|
|
200
207
|
return None
|
|
201
208
|
resp = json.loads(resp_line)
|
|
@@ -394,6 +394,46 @@ def _init_hopfield_channel(
|
|
|
394
394
|
return None
|
|
395
395
|
|
|
396
396
|
|
|
397
|
+
def _init_quantization_aware_search(
|
|
398
|
+
vector_store: Any,
|
|
399
|
+
db: DatabaseManager,
|
|
400
|
+
config: SLMConfig,
|
|
401
|
+
) -> Any | None:
|
|
402
|
+
"""Create QuantizationAwareSearch if quantized store is available.
|
|
403
|
+
|
|
404
|
+
Returns None on failure — SemanticChannel falls back to VectorStore KNN.
|
|
405
|
+
Stateless wrapper: zero memory overhead, no workers, no threads.
|
|
406
|
+
"""
|
|
407
|
+
if vector_store is None:
|
|
408
|
+
return None
|
|
409
|
+
try:
|
|
410
|
+
from superlocalmemory.retrieval.quantization_aware_search import QuantizationAwareSearch
|
|
411
|
+
from superlocalmemory.storage.quantized_store import QuantizedEmbeddingStore
|
|
412
|
+
from superlocalmemory.math.polar_quant import PolarQuantEncoder
|
|
413
|
+
from superlocalmemory.math.qjl import QJLEncoder
|
|
414
|
+
|
|
415
|
+
polar = PolarQuantEncoder(dimension=config.embedding.dimension)
|
|
416
|
+
qjl: QJLEncoder | None = None
|
|
417
|
+
try:
|
|
418
|
+
qjl = QJLEncoder(dimension=config.embedding.dimension)
|
|
419
|
+
except Exception:
|
|
420
|
+
pass # QJL is optional (HR-07)
|
|
421
|
+
|
|
422
|
+
q_store = QuantizedEmbeddingStore(
|
|
423
|
+
db=db, polar=polar, qjl=qjl, config=config.quantization,
|
|
424
|
+
)
|
|
425
|
+
qas = QuantizationAwareSearch(
|
|
426
|
+
vector_store=vector_store,
|
|
427
|
+
quantized_store=q_store,
|
|
428
|
+
config=config.quantization,
|
|
429
|
+
)
|
|
430
|
+
logger.info("QuantizationAwareSearch initialized (TurboQuant 3-tier search)")
|
|
431
|
+
return qas
|
|
432
|
+
except Exception as exc:
|
|
433
|
+
logger.debug("QuantizationAwareSearch init failed (non-fatal): %s", exc)
|
|
434
|
+
return None
|
|
435
|
+
|
|
436
|
+
|
|
397
437
|
def init_retrieval(
|
|
398
438
|
config: SLMConfig,
|
|
399
439
|
db: DatabaseManager,
|
|
@@ -412,6 +452,9 @@ def init_retrieval(
|
|
|
412
452
|
from superlocalmemory.retrieval.profile_channel import ProfileChannel
|
|
413
453
|
from superlocalmemory.retrieval.bridge_discovery import BridgeDiscovery
|
|
414
454
|
|
|
455
|
+
# V3.3.19: TurboQuant 3-tier search (stateless, zero memory overhead)
|
|
456
|
+
qas = _init_quantization_aware_search(vector_store, db, config)
|
|
457
|
+
|
|
415
458
|
channels: dict = {
|
|
416
459
|
"semantic": SemanticChannel(
|
|
417
460
|
db,
|
|
@@ -419,6 +462,7 @@ def init_retrieval(
|
|
|
419
462
|
embedder=embedder,
|
|
420
463
|
fisher_mode=config.math.fisher_mode,
|
|
421
464
|
vector_store=vector_store,
|
|
465
|
+
quantization_aware_search=qas,
|
|
422
466
|
),
|
|
423
467
|
"bm25": BM25Channel(db),
|
|
424
468
|
"entity_graph": EntityGraphChannel(db, entity_resolver),
|
|
@@ -53,7 +53,60 @@ def reset_engine():
|
|
|
53
53
|
_engine = None
|
|
54
54
|
|
|
55
55
|
|
|
56
|
-
# Register
|
|
56
|
+
# Register tools and resources -------------------------------------------------
|
|
57
|
+
#
|
|
58
|
+
# V3.3.19: Trimmed from 38 tools to 15 essential tools.
|
|
59
|
+
# IDEs cap at 50-100 tools total (Cursor, Antigravity, Windsurf).
|
|
60
|
+
# 38 tools from SLM alone crowds out other MCP servers.
|
|
61
|
+
#
|
|
62
|
+
# Essential 15: the tools an AI agent actually needs during a session.
|
|
63
|
+
# Admin/diagnostics tools remain available via CLI (`slm <command>`).
|
|
64
|
+
# Set SLM_MCP_ALL_TOOLS=1 to enable all 38 tools (power users).
|
|
65
|
+
|
|
66
|
+
import os as _os_reg
|
|
67
|
+
|
|
68
|
+
_ESSENTIAL_TOOLS: frozenset[str] = frozenset({
|
|
69
|
+
# Core memory operations (8)
|
|
70
|
+
"remember", "recall", "search", "fetch",
|
|
71
|
+
"list_recent", "delete_memory", "update_memory", "get_status",
|
|
72
|
+
# Session lifecycle (3)
|
|
73
|
+
"session_init", "observe", "close_session",
|
|
74
|
+
# Memory management (2)
|
|
75
|
+
"forget", "run_maintenance",
|
|
76
|
+
# Infinite memory + learning (4)
|
|
77
|
+
"consolidate_cognitive", "get_soft_prompts",
|
|
78
|
+
"set_mode", "report_outcome",
|
|
79
|
+
})
|
|
80
|
+
|
|
81
|
+
_all_tools = _os_reg.environ.get("SLM_MCP_ALL_TOOLS") == "1"
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
class _FilteredServer:
|
|
85
|
+
"""Wraps FastMCP to only register essential tools.
|
|
86
|
+
|
|
87
|
+
Non-essential tools are silently skipped (not registered on the MCP
|
|
88
|
+
server). They remain available via CLI. When SLM_MCP_ALL_TOOLS=1,
|
|
89
|
+
all tools are registered (bypass filter).
|
|
90
|
+
"""
|
|
91
|
+
__slots__ = ("_server", "_allowed")
|
|
92
|
+
|
|
93
|
+
def __init__(self, real_server: FastMCP, allowed: frozenset[str]) -> None:
|
|
94
|
+
self._server = real_server
|
|
95
|
+
self._allowed = allowed
|
|
96
|
+
|
|
97
|
+
def tool(self, *args, **kwargs):
|
|
98
|
+
def decorator(func):
|
|
99
|
+
if func.__name__ in self._allowed:
|
|
100
|
+
return self._server.tool(*args, **kwargs)(func)
|
|
101
|
+
return func # Skip registration — still importable, just not MCP-visible
|
|
102
|
+
return decorator
|
|
103
|
+
|
|
104
|
+
def __getattr__(self, name):
|
|
105
|
+
return getattr(self._server, name)
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
# Choose full or filtered registration target
|
|
109
|
+
_target = server if _all_tools else _FilteredServer(server, _ESSENTIAL_TOOLS)
|
|
57
110
|
|
|
58
111
|
from superlocalmemory.mcp.tools_core import register_core_tools
|
|
59
112
|
from superlocalmemory.mcp.tools_v28 import register_v28_tools
|
|
@@ -62,12 +115,12 @@ from superlocalmemory.mcp.tools_active import register_active_tools
|
|
|
62
115
|
from superlocalmemory.mcp.tools_v33 import register_v33_tools
|
|
63
116
|
from superlocalmemory.mcp.resources import register_resources
|
|
64
117
|
|
|
65
|
-
register_core_tools(
|
|
66
|
-
register_v28_tools(
|
|
67
|
-
register_v3_tools(
|
|
68
|
-
register_active_tools(
|
|
69
|
-
register_v33_tools(
|
|
70
|
-
register_resources(server, get_engine)
|
|
118
|
+
register_core_tools(_target, get_engine)
|
|
119
|
+
register_v28_tools(_target, get_engine)
|
|
120
|
+
register_v3_tools(_target, get_engine)
|
|
121
|
+
register_active_tools(_target, get_engine)
|
|
122
|
+
register_v33_tools(_target, get_engine)
|
|
123
|
+
register_resources(server, get_engine) # Resources always registered (not tools)
|
|
71
124
|
|
|
72
125
|
|
|
73
126
|
if __name__ == "__main__":
|
|
@@ -95,14 +95,20 @@ def register_core_tools(server, get_engine: Callable) -> None:
|
|
|
95
95
|
Extracts atomic facts, resolves entities, builds graph edges,
|
|
96
96
|
and indexes for 4-channel retrieval.
|
|
97
97
|
"""
|
|
98
|
+
import asyncio
|
|
98
99
|
try:
|
|
99
100
|
from superlocalmemory.core.worker_pool import WorkerPool
|
|
100
101
|
pool = WorkerPool.shared()
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
102
|
+
# V3.3.19: Run store in thread pool so it doesn't block the
|
|
103
|
+
# MCP event loop. Before this fix, every remember call blocked
|
|
104
|
+
# the IDE/agent for 11-17s in Mode B (Ollama LLM fact extraction).
|
|
105
|
+
result = await asyncio.to_thread(
|
|
106
|
+
pool.store, content, metadata={
|
|
107
|
+
"tags": tags, "project": project,
|
|
108
|
+
"importance": importance, "agent_id": agent_id,
|
|
109
|
+
"session_id": session_id,
|
|
110
|
+
},
|
|
111
|
+
)
|
|
106
112
|
if result.get("ok"):
|
|
107
113
|
_emit_event("memory.created", {
|
|
108
114
|
"content_preview": content[:80],
|
|
@@ -118,10 +124,12 @@ def register_core_tools(server, get_engine: Callable) -> None:
|
|
|
118
124
|
@server.tool()
|
|
119
125
|
async def recall(query: str, limit: int = 10, agent_id: str = "mcp_client") -> dict:
|
|
120
126
|
"""Search memories by semantic query with 4-channel retrieval, RRF fusion, and reranking."""
|
|
127
|
+
import asyncio
|
|
121
128
|
try:
|
|
122
129
|
from superlocalmemory.core.worker_pool import WorkerPool
|
|
123
130
|
pool = WorkerPool.shared()
|
|
124
|
-
|
|
131
|
+
# V3.3.19: Run in thread pool to avoid blocking MCP event loop
|
|
132
|
+
result = await asyncio.to_thread(pool.recall, query, limit=limit)
|
|
125
133
|
if result.get("ok"):
|
|
126
134
|
# Record implicit feedback: every returned result is a recall_hit
|
|
127
135
|
try:
|
|
@@ -102,7 +102,11 @@ class BridgeDiscovery:
|
|
|
102
102
|
for f in entity_facts[:5]:
|
|
103
103
|
if f.fact_id not in seen:
|
|
104
104
|
seen.add(f.fact_id)
|
|
105
|
-
|
|
105
|
+
overlap = (
|
|
106
|
+
len(set(f.canonical_entities) & entities_a)
|
|
107
|
+
+ len(set(f.canonical_entities) & entities_b)
|
|
108
|
+
)
|
|
109
|
+
bridges.append((f.fact_id, min(1.0, 0.5 + overlap * 0.15)))
|
|
106
110
|
|
|
107
111
|
if len(bridges) >= max_bridges:
|
|
108
112
|
break
|
|
@@ -146,13 +146,14 @@ class RetrievalEngine:
|
|
|
146
146
|
fused = weighted_rrf(ch_results, strat.weights, k=self._config.rrf_k)
|
|
147
147
|
|
|
148
148
|
# Bridge discovery for multi-hop queries
|
|
149
|
+
# V3.3.19: Only bridge.discover() (86ms). Removed bridge.spreading_activation()
|
|
150
|
+
# which did per-node SQL queries across 254K edges → 78s latency.
|
|
151
|
+
# The SYNAPSE SA channel already provides proper SA with in-memory caching.
|
|
149
152
|
if self._bridge is not None and strat.query_type in ("multi_hop", "entity", "factual", "general"):
|
|
150
153
|
try:
|
|
151
154
|
seed_ids = [fr.fact_id for fr in fused[:10]]
|
|
152
155
|
bridges = self._bridge.discover(seed_ids, profile_id, max_bridges=10)
|
|
153
|
-
|
|
154
|
-
extra = bridges + spread
|
|
155
|
-
for fid, score in extra:
|
|
156
|
+
for fid, score in bridges:
|
|
156
157
|
if not any(fr.fact_id == fid for fr in fused):
|
|
157
158
|
fused.append(FusionResult(
|
|
158
159
|
fact_id=fid, fused_score=score * 0.8,
|
|
@@ -89,8 +89,8 @@ class EntityGraphChannel:
|
|
|
89
89
|
def __init__(
|
|
90
90
|
self, db: DatabaseManager,
|
|
91
91
|
entity_resolver: EntityResolver | None = None,
|
|
92
|
-
decay: float = 0.7, activation_threshold: float = 0.
|
|
93
|
-
max_hops: int =
|
|
92
|
+
decay: float = 0.7, activation_threshold: float = 0.05,
|
|
93
|
+
max_hops: int = 4,
|
|
94
94
|
) -> None:
|
|
95
95
|
self._db = db
|
|
96
96
|
self._resolver = entity_resolver
|
|
@@ -98,7 +98,11 @@ class QuantizationAwareSearch:
|
|
|
98
98
|
) -> list[tuple[str, float]]:
|
|
99
99
|
"""Tier 1: float32 exact cosine via VectorStore."""
|
|
100
100
|
try:
|
|
101
|
-
return self._vector_store.search(
|
|
101
|
+
return self._vector_store.search(
|
|
102
|
+
query_embedding=list(query) if hasattr(query, 'tolist') else query,
|
|
103
|
+
top_k=top_k,
|
|
104
|
+
profile_id=profile_id,
|
|
105
|
+
)
|
|
102
106
|
except Exception as exc:
|
|
103
107
|
logger.debug("float32 search failed: %s", exc)
|
|
104
108
|
return []
|
|
@@ -109,9 +113,13 @@ class QuantizationAwareSearch:
|
|
|
109
113
|
"""Tier 2: int8 approximate via VectorStore.search_int8.
|
|
110
114
|
|
|
111
115
|
Applies 0.98x penalty to account for int8 quantization error.
|
|
116
|
+
Gracefully returns [] if VectorStore lacks search_int8 method.
|
|
112
117
|
"""
|
|
118
|
+
fn = getattr(self._vector_store, "search_int8", None)
|
|
119
|
+
if fn is None:
|
|
120
|
+
return []
|
|
113
121
|
try:
|
|
114
|
-
raw =
|
|
122
|
+
raw = fn(query, profile_id=profile_id, top_k=top_k)
|
|
115
123
|
return [(fid, score * _INT8_PENALTY) for fid, score in raw]
|
|
116
124
|
except Exception as exc:
|
|
117
125
|
logger.debug("int8 search failed: %s", exc)
|
|
@@ -84,6 +84,7 @@ class SemanticChannel:
|
|
|
84
84
|
embedder: object | None = None,
|
|
85
85
|
fisher_mode: str = "simplified",
|
|
86
86
|
vector_store: Any | None = None,
|
|
87
|
+
quantization_aware_search: Any | None = None,
|
|
87
88
|
) -> None:
|
|
88
89
|
self._db = db
|
|
89
90
|
self._temperature = fisher_temperature
|
|
@@ -92,6 +93,8 @@ class SemanticChannel:
|
|
|
92
93
|
# Lazily instantiated full metric (avoids import cost when not needed)
|
|
93
94
|
self._full_metric: object | None = None
|
|
94
95
|
self._vector_store = vector_store
|
|
96
|
+
# V3.3.19: TurboQuant 3-tier search (stateless, optional)
|
|
97
|
+
self._qas = quantization_aware_search
|
|
95
98
|
|
|
96
99
|
def search(
|
|
97
100
|
self,
|
|
@@ -137,11 +140,26 @@ class SemanticChannel:
|
|
|
137
140
|
profile_id: str,
|
|
138
141
|
top_k: int,
|
|
139
142
|
) -> list[tuple[str, float]]:
|
|
140
|
-
"""KNN via VectorStore, then Fisher-Rao re-scoring
|
|
141
|
-
#
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
143
|
+
"""KNN via VectorStore (or QAS 3-tier), then Fisher-Rao re-scoring."""
|
|
144
|
+
# V3.3.19: Try TurboQuant 3-tier search first (float32 + int8 + polar)
|
|
145
|
+
if self._qas is not None:
|
|
146
|
+
try:
|
|
147
|
+
knn_results = self._qas.search(
|
|
148
|
+
query_embedding=q_vec, profile_id=profile_id,
|
|
149
|
+
top_k=top_k * 2,
|
|
150
|
+
)
|
|
151
|
+
except Exception:
|
|
152
|
+
knn_results = []
|
|
153
|
+
# Fall through to VectorStore if QAS returned nothing
|
|
154
|
+
if not knn_results:
|
|
155
|
+
knn_results = self._vector_store.search(
|
|
156
|
+
query_embedding, top_k=top_k * 2, profile_id=profile_id,
|
|
157
|
+
)
|
|
158
|
+
else:
|
|
159
|
+
# Step 1: Fast KNN -- get 2x top_k candidates for Fisher re-ranking
|
|
160
|
+
knn_results = self._vector_store.search(
|
|
161
|
+
query_embedding, top_k=top_k * 2, profile_id=profile_id,
|
|
162
|
+
)
|
|
145
163
|
if not knn_results:
|
|
146
164
|
return [] # Caller falls through to full scan
|
|
147
165
|
|
|
@@ -97,7 +97,7 @@ class SpreadingActivation:
|
|
|
97
97
|
try:
|
|
98
98
|
# Step 0: Get seed nodes from VectorStore KNN
|
|
99
99
|
seed_results = self._vector_store.search(
|
|
100
|
-
query, top_k=self._config.top_m,
|
|
100
|
+
query, top_k=self._config.top_m, profile_id=profile_id,
|
|
101
101
|
)
|
|
102
102
|
if not seed_results:
|
|
103
103
|
return []
|
|
@@ -125,7 +125,7 @@ class SpreadingActivation:
|
|
|
125
125
|
return results[:top_k]
|
|
126
126
|
|
|
127
127
|
except Exception as exc:
|
|
128
|
-
logger.
|
|
128
|
+
logger.warning(
|
|
129
129
|
"SpreadingActivation.search failed for profile %s: %s",
|
|
130
130
|
profile_id, exc,
|
|
131
131
|
)
|
|
@@ -35,10 +35,34 @@ _TEMPORAL_WORDS: frozenset[str] = frozenset({
|
|
|
35
35
|
})
|
|
36
36
|
|
|
37
37
|
_MULTI_HOP_PHRASES: tuple[str, ...] = (
|
|
38
|
+
# Original 8 phrases
|
|
38
39
|
"and then", "after that", "because", "how did",
|
|
39
40
|
"as a result", "led to", "connection between", "relationship between",
|
|
41
|
+
# V3.3.19: LoCoMo-style multi-hop patterns (causal/temporal chains)
|
|
42
|
+
"what happened when", "what was happening",
|
|
43
|
+
"during the time", "at the same time",
|
|
44
|
+
"how did it affect", "what changed after",
|
|
45
|
+
"what did they do after", "what did they do before",
|
|
46
|
+
"what was the result", "what was the outcome",
|
|
47
|
+
"what was the reason", "why did they",
|
|
48
|
+
"in response to", "as a consequence",
|
|
49
|
+
"prior to", "following that", "subsequent to",
|
|
50
|
+
"in the meantime", "at that point",
|
|
51
|
+
"which led to", "which caused", "which resulted in",
|
|
40
52
|
)
|
|
41
53
|
|
|
54
|
+
# Words that signal causal/temporal chain when combined with 2+ entities.
|
|
55
|
+
# Excludes common instruction verbs (tell, help) to avoid false positives
|
|
56
|
+
# on queries like "Tell me about Alice and Bob".
|
|
57
|
+
_CAUSAL_TEMPORAL_WORDS: frozenset[str] = frozenset({
|
|
58
|
+
"before", "after", "when", "while", "because", "then",
|
|
59
|
+
"during", "since", "until", "once",
|
|
60
|
+
"affect", "cause", "change", "happen", "result",
|
|
61
|
+
"influence", "impact", "lead", "meet",
|
|
62
|
+
"start", "stop", "begin", "end", "move", "leave",
|
|
63
|
+
"join", "visit", "return",
|
|
64
|
+
})
|
|
65
|
+
|
|
42
66
|
_AGGREGATION_WORDS: frozenset[str] = frozenset({
|
|
43
67
|
"all", "list", "every", "everything", "various", "different",
|
|
44
68
|
"many", "several", "multiple", "summarize", "overview",
|
|
@@ -80,22 +104,30 @@ class QueryStrategyClassifier:
|
|
|
80
104
|
# Strip punctuation from words so "january?" matches "january"
|
|
81
105
|
words = set(re.sub(r"[^\w\s'-]", "", q).split())
|
|
82
106
|
|
|
83
|
-
# Check multi_hop
|
|
84
|
-
# must not be short-circuited by the word "between" in _TEMPORAL_WORDS.
|
|
107
|
+
# Check multi_hop phrases FIRST (exact phrase match)
|
|
85
108
|
if any(p in q for p in _MULTI_HOP_PHRASES):
|
|
86
109
|
return "multi_hop"
|
|
110
|
+
|
|
111
|
+
# Extract proper nouns EARLY for the multi-entity heuristic
|
|
112
|
+
_SENTENCE_STARTERS = {"What", "Where", "Who", "Which", "How", "When",
|
|
113
|
+
"Does", "Did", "Can", "Could", "Would", "Should",
|
|
114
|
+
"Are", "Is", "Was", "Were", "Has", "Have", "The", "Tell"}
|
|
115
|
+
proper_nouns = [m for m in re.findall(r"\b[A-Z][a-z]{1,}\b", query)
|
|
116
|
+
if m not in _SENTENCE_STARTERS]
|
|
117
|
+
|
|
118
|
+
# V3.3.19: 2+ entities + causal/temporal word → multi_hop
|
|
119
|
+
# This MUST fire BEFORE the temporal check, otherwise "What did
|
|
120
|
+
# Alice study before moving to New York?" would classify as
|
|
121
|
+
# "temporal" instead of "multi_hop".
|
|
122
|
+
if len(proper_nouns) >= 2 and words & _CAUSAL_TEMPORAL_WORDS:
|
|
123
|
+
return "multi_hop"
|
|
124
|
+
|
|
87
125
|
if words & _TEMPORAL_WORDS:
|
|
88
126
|
return "temporal"
|
|
89
127
|
if words & _AGGREGATION_WORDS:
|
|
90
128
|
return "aggregation"
|
|
91
129
|
if any(w in q for w in _OPINION_WORDS):
|
|
92
130
|
return "opinion"
|
|
93
|
-
# Proper nouns — exclude common sentence-initial words
|
|
94
|
-
_SENTENCE_STARTERS = {"What", "Where", "Who", "Which", "How", "When",
|
|
95
|
-
"Does", "Did", "Can", "Could", "Would", "Should",
|
|
96
|
-
"Are", "Is", "Was", "Were", "Has", "Have", "The", "Tell"}
|
|
97
|
-
proper_nouns = [m for m in re.findall(r"\b[A-Z][a-z]{1,}\b", query)
|
|
98
|
-
if m not in _SENTENCE_STARTERS]
|
|
99
131
|
if len(proper_nouns) >= 2:
|
|
100
132
|
return "entity"
|
|
101
133
|
if q.startswith(("what ", "where ", "who ", "which ", "how ")):
|