npm - superlocalmemory - Versions diffs - 3.3.18 → 3.3.19 - Mend

superlocalmemory 3.3.18 → 3.3.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

package/package.json +1 -1
package/pyproject.toml +1 -1
package/scripts/postinstall.js +45 -19
package/src/superlocalmemory/cli/commands.py +27 -3
package/src/superlocalmemory/cli/main.py +13 -1
package/src/superlocalmemory/cli/setup_wizard.py +403 -45
package/src/superlocalmemory/core/config.py +1 -1
package/src/superlocalmemory/core/embeddings.py +10 -3
package/src/superlocalmemory/core/engine_wiring.py +44 -0
package/src/superlocalmemory/mcp/server.py +60 -7
package/src/superlocalmemory/mcp/tools_core.py +14 -6
package/src/superlocalmemory/retrieval/bridge_discovery.py +5 -1
package/src/superlocalmemory/retrieval/engine.py +4 -3
package/src/superlocalmemory/retrieval/entity_channel.py +2 -2
package/src/superlocalmemory/retrieval/quantization_aware_search.py +10 -2
package/src/superlocalmemory/retrieval/semantic_channel.py +23 -5
package/src/superlocalmemory/retrieval/spreading_activation.py +2 -2
package/src/superlocalmemory/retrieval/strategy.py +40 -8

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "superlocalmemory",
-  "version": "3.3.18",
+  "version": "3.3.19",
   "description": "Information-geometric agent memory with mathematical guarantees. 4-channel retrieval, Fisher-Rao similarity, zero-LLM mode, EU AI Act compliant. Works with Claude, Cursor, Windsurf, and 17+ AI tools.",
   "keywords": [
     "ai-memory",

package/pyproject.toml CHANGED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "superlocalmemory"
-version = "3.3.18"
+version = "3.3.19"
 description = "Information-geometric agent memory with mathematical guarantees"
 readme = "README.md"
 license = {text = "MIT"}

package/scripts/postinstall.js CHANGED Viewed

@@ -240,28 +240,54 @@ if (fs.existsSync(hooksDisabledFile)) {
     }
 }
+// --- Step 6: Run interactive setup wizard ---
+// Downloads embedding + reranker models, configures mode, verifies installation.
+// If TTY is available (interactive terminal), runs the full wizard.
+// If not (CI, piped), uses defaults (Mode A, skip model download).
+console.log('\n════════════════════════════════════════════════════════════');
+console.log('  Running setup wizard (model download + verification)...');
+console.log('════════════════════════════════════════════════════════════\n');
+const isTTY = process.stdin.isTTY && process.stdout.isTTY;
+const setupArgs = isTTY ? ['setup'] : ['setup'];
+const setupEnv = {
+    ...process.env,
+    PATH: '/opt/homebrew/bin:/usr/local/bin:/usr/bin:' + (process.env.PATH || ''),
+    PYTHONPATH: path.join(__dirname, '..', 'src') + ':' + (process.env.PYTHONPATH || ''),
+    CUDA_VISIBLE_DEVICES: '',
+    TOKENIZERS_PARALLELISM: 'false',
+    TORCH_DEVICE: 'cpu',
+};
+// Non-interactive: set env flag so wizard uses defaults
+if (!isTTY) {
+    setupEnv.SLM_NON_INTERACTIVE = '1';
+}
+const setupResult = spawnSync(pythonParts[0], [
+    ...pythonParts.slice(1), '-m', 'superlocalmemory.cli.main', ...setupArgs,
+], {
+    stdio: 'inherit',  // Show all output including download progress
+    timeout: 900000,    // 15 min (model downloads can be slow)
+    env: setupEnv,
+});
+if (setupResult.status === 0) {
+    console.log('✓ Setup wizard completed successfully');
+} else {
+    console.log('⚠ Setup wizard had issues (run: slm setup)');
+    console.log('  SuperLocalMemory will still work — models download on first use.');
+}
 // --- Done ---
-console.log('════════════════════════════════════════════════════════════');
-console.log('  ✓ SuperLocalMemory V3 installed successfully!');
+console.log('\n════════════════════════════════════════════════════════════');
+console.log('  ✓ SuperLocalMemory V3 installed!');
 console.log('');
 console.log('  Quick start:');
-console.log('    Just open Claude Code — memory works automatically!');
-console.log('');
-console.log('  Other commands:');
-console.log('    slm doctor         # Pre-flight check (verify everything works)');
-console.log('    slm warmup         # Pre-download embedding model (~500MB)');
-console.log('    slm remember "..." # Store a memory');
-console.log('    slm recall "..."   # Search memories');
-console.log('    slm dashboard      # Open 17-tab web dashboard');
-console.log('    slm hooks status   # Check hook installation');
-console.log('    slm hooks remove   # Opt out of auto-memory hooks');
-console.log('');
-console.log('  Prerequisites satisfied:');
-console.log('    ✓ Python 3.11+');
-console.log('    ✓ Core math & search libraries');
-console.log('    ✓ Dashboard server (fastapi, uvicorn)');
-console.log('    ✓ Learning engine (lightgbm)');
-console.log('    ✓ Data directory (~/.superlocalmemory/)');
+console.log('    slm remember "..."   # Store a memory');
+console.log('    slm recall "..."     # Search memories');
+console.log('    slm dashboard        # Open web dashboard');
+console.log('    slm setup            # Re-run setup wizard');
 console.log('');
 console.log('  Docs: https://github.com/qualixar/superlocalmemory/wiki');
 console.log('════════════════════════════════════════════════════════════\n');

package/src/superlocalmemory/cli/commands.py CHANGED Viewed

@@ -68,11 +68,12 @@ def dispatch(args: Namespace) -> None:
 # -- Setup & Config (no --json — interactive commands) ---------------------
-def cmd_setup(_args: Namespace) -> None:
+def cmd_setup(args: Namespace) -> None:
     """Run the interactive setup wizard."""
     from superlocalmemory.cli.setup_wizard import run_wizard
-    run_wizard()
+    run_wizard(auto=getattr(args, "auto", False))
+    sys.exit(0)  # Force clean exit (background threads from imports may linger)
 def cmd_mode(args: Namespace) -> None:
@@ -249,9 +250,32 @@ def cmd_list(args: Namespace) -> None:
 def cmd_remember(args: Namespace) -> None:
     """Store a memory via the engine."""
     from superlocalmemory.core.config import SLMConfig
-    from superlocalmemory.core.engine import MemoryEngine
     use_json = getattr(args, 'json', False)
+    fire_and_forget = getattr(args, 'fire_and_forget', False)
+    # V3.3.19: --async flag for hooks/scripts — spawn background process, return instantly
+    if fire_and_forget:
+        import subprocess
+        cmd = [sys.executable, "-m", "superlocalmemory.cli.main", "remember", args.content]
+        if args.tags:
+            cmd.extend(["--tags", args.tags])
+        if use_json:
+            cmd.append("--json")
+        # Spawn detached subprocess — parent exits immediately
+        subprocess.Popen(
+            cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL,
+            start_new_session=True,
+        )
+        if use_json:
+            from superlocalmemory.cli.json_output import json_print
+            json_print("remember", data={"queued": True, "async": True})
+        else:
+            print("Queued for background processing.")
+        return
+    from superlocalmemory.core.engine import MemoryEngine
     try:
         config = SLMConfig.load()
         engine = MemoryEngine(config)

package/src/superlocalmemory/cli/main.py CHANGED Viewed

@@ -100,7 +100,11 @@ def main() -> None:
         help="Enable PreToolUse gate (experimental — blocks tools until session_init)",
     )
-    sub.add_parser("setup", help="Interactive first-time setup wizard")
+    setup_p = sub.add_parser("setup", help="Interactive first-time setup wizard")
+    setup_p.add_argument(
+        "--auto", action="store_true",
+        help="Non-interactive mode: use defaults (for CI/scripts)",
+    )
     mode_p = sub.add_parser("mode", help="Get or set operating mode (a/b/c)")
     mode_p.add_argument(
@@ -130,6 +134,10 @@ def main() -> None:
     remember_p.add_argument("content", help="Content to remember")
     remember_p.add_argument("--tags", default="", help="Comma-separated tags")
     remember_p.add_argument("--json", action="store_true", help="Output structured JSON (agent-native)")
+    remember_p.add_argument(
+        "--async", dest="fire_and_forget", action="store_true",
+        help="Return immediately, process in background (for hooks/scripts)",
+    )
     recall_p = sub.add_parser("recall", help="Semantic search with 4-channel retrieval")
     recall_p.add_argument("query", help="Search query")
@@ -262,6 +270,10 @@ def main() -> None:
         parser.print_help()
         sys.exit(0)
+    # V3.3.19: Auto-trigger setup wizard on first use
+    from superlocalmemory.cli.setup_wizard import check_first_use
+    check_first_use(args.command)
     from superlocalmemory.cli.commands import dispatch
     dispatch(args)

package/src/superlocalmemory/cli/setup_wizard.py CHANGED Viewed

@@ -4,7 +4,11 @@
 """Interactive setup wizard for first-time configuration.
-Guides new users through mode selection and provider setup.
+Runs automatically on first use of any `slm` command, or via `slm setup`.
+Downloads models, configures mode, verifies installation.
+For npm: triggered by postinstall.js after dependency installation.
+For pip: triggered on first `slm` command when .setup-complete is missing.
 Part of Qualixar | Author: Varun Pratap Bhardwaj
 """
@@ -12,98 +16,453 @@ Part of Qualixar | Author: Varun Pratap Bhardwaj
 from __future__ import annotations
 import os
+import platform
 import shutil
+import subprocess
+import sys
+import time
+from pathlib import Path
+# ---------------------------------------------------------------------------
+# Constants
+# ---------------------------------------------------------------------------
+_SLM_HOME = Path(os.environ.get("SL_MEMORY_PATH", Path.home() / ".superlocalmemory"))
+_SETUP_MARKER = _SLM_HOME / ".setup-complete"
+_EMBED_MODEL = "nomic-ai/nomic-embed-text-v1.5"
+_RERANKER_MODEL = "cross-encoder/ms-marco-MiniLM-L-12-v2"
+# ---------------------------------------------------------------------------
+# Detection helpers
+# ---------------------------------------------------------------------------
+def is_interactive() -> bool:
+    """True if running in a terminal (not CI, not piped, not MCP)."""
+    if os.environ.get("CI"):
+        return False
+    if os.environ.get("SLM_NON_INTERACTIVE"):
+        return False
+    return sys.stdin.isatty() and sys.stdout.isatty()
+def is_setup_complete() -> bool:
+    """True if the setup wizard has been run at least once."""
+    return _SETUP_MARKER.exists()
+def needs_setup() -> bool:
+    """True if setup should auto-trigger (first use)."""
+    return not is_setup_complete()
+def _prompt(message: str, default: str = "") -> str:
+    """Prompt user for input. Returns default if non-interactive."""
+    if not is_interactive():
+        return default
+    try:
+        return input(message).strip() or default
+    except (EOFError, KeyboardInterrupt):
+        print()
+        return default
+def _get_ram_gb() -> float:
+    """Get total system RAM in GB."""
+    try:
+        import psutil
+        return psutil.virtual_memory().total / (1024 ** 3)
+    except ImportError:
+        pass
+    # Fallback: macOS
+    if platform.system() == "Darwin":
+        try:
+            out = subprocess.check_output(["sysctl", "-n", "hw.memsize"], text=True)
+            return int(out.strip()) / (1024 ** 3)
+        except Exception:
+            pass
+    # Fallback: Linux
+    try:
+        with open("/proc/meminfo") as f:
+            for line in f:
+                if line.startswith("MemTotal:"):
+                    return int(line.split()[1]) / (1024 ** 2)
+    except Exception:
+        pass
+    return 0.0
+# ---------------------------------------------------------------------------
+# Model download
+# ---------------------------------------------------------------------------
+def _download_model(model_name: str, label: str) -> bool:
+    """Download a HuggingFace model with visible progress.
+    Runs in a subprocess so the main process never loads torch.
+    stderr is inherited so the user sees download progress bars.
+    Returns True on success.
+    """
+    print(f"\n  Downloading {label}: {model_name}")
+    print(f"  (this may take a few minutes on first run)\n")
+    script = (
+        f"import sys; "
+        f"from sentence_transformers import SentenceTransformer; "
+        f"m = SentenceTransformer('{model_name}', trust_remote_code=True); "
+        f"d = m.get_sentence_embedding_dimension(); "
+        f"print(f'OK dim={{d}}'); "
+    )
+    try:
+        result = subprocess.run(
+            [sys.executable, "-c", script],
+            timeout=600,  # 10 min for large model downloads
+            capture_output=False,  # Show download progress
+            text=True,
+            env={
+                **os.environ,
+                "CUDA_VISIBLE_DEVICES": "",
+                "TOKENIZERS_PARALLELISM": "false",
+                "TORCH_DEVICE": "cpu",
+            },
+        )
+        if result.returncode == 0:
+            print(f"  ✓ {label} ready")
+            return True
+        print(f"  ✗ {label} download failed (exit code {result.returncode})")
+        return False
+    except subprocess.TimeoutExpired:
+        print(f"  ✗ {label} download timed out (10 min)")
+        return False
+    except FileNotFoundError:
+        print(f"  ✗ Python not found: {sys.executable}")
+        return False
+    except Exception as exc:
+        print(f"  ✗ {label} download error: {exc}")
+        return False
+def _download_reranker(model_name: str) -> bool:
+    """Download cross-encoder reranker model."""
+    print(f"\n  Downloading reranker: {model_name}")
+    print(f"  (cross-encoder for result re-ranking)\n")
+    script = (
+        f"from sentence_transformers import CrossEncoder; "
+        f"m = CrossEncoder('{model_name}', trust_remote_code=True); "
+        f"print('OK'); "
+    )
+    try:
+        result = subprocess.run(
+            [sys.executable, "-c", script],
+            timeout=300,
+            capture_output=False,
+            text=True,
+            env={
+                **os.environ,
+                "CUDA_VISIBLE_DEVICES": "",
+                "TOKENIZERS_PARALLELISM": "false",
+                "TORCH_DEVICE": "cpu",
+            },
+        )
+        if result.returncode == 0:
+            print(f"  ✓ Reranker ready")
+            return True
+        print(f"  ✗ Reranker download failed")
+        return False
+    except Exception as exc:
+        print(f"  ✗ Reranker error: {exc}")
+        return False
+# ---------------------------------------------------------------------------
+# Verification
+# ---------------------------------------------------------------------------
+def _verify_installation() -> bool:
+    """Quick smoke test: embed a sentence, verify dimension."""
+    print("\n  Running verification test...")
+    script = (
+        "from superlocalmemory.core.embeddings import EmbeddingService; "
+        "from superlocalmemory.core.config import EmbeddingConfig; "
+        "cfg = EmbeddingConfig(); "
+        "svc = EmbeddingService(cfg); "
+        "vec = svc.embed('SuperLocalMemory setup verification test'); "
+        "print(f'OK dim={len(vec)}' if vec else 'FAIL'); "
+        "svc.unload(); "
+    )
+    try:
+        result = subprocess.run(
+            [sys.executable, "-c", script],
+            timeout=120,
+            capture_output=True,
+            text=True,
+            env={
+                **os.environ,
+                "CUDA_VISIBLE_DEVICES": "",
+                "TOKENIZERS_PARALLELISM": "false",
+                "TORCH_DEVICE": "cpu",
+            },
+        )
+        stdout = result.stdout.strip()
+        if "OK dim=" in stdout:
+            dim = stdout.split("dim=")[1]
+            print(f"  ✓ Embedding verified (dimension={dim})")
+            return True
+        print(f"  ✗ Verification failed: {stdout}")
+        if result.stderr:
+            # Show last 3 lines of stderr for diagnosis
+            lines = result.stderr.strip().split("\n")
+            for line in lines[-3:]:
+                print(f"    {line}")
+        return False
+    except subprocess.TimeoutExpired:
+        print("  ✗ Verification timed out (120s)")
+        return False
+    except Exception as exc:
+        print(f"  ✗ Verification error: {exc}")
+        return False
+# ---------------------------------------------------------------------------
+# Mark setup complete
+# ---------------------------------------------------------------------------
+def _mark_complete() -> None:
+    """Write .setup-complete marker file."""
+    _SLM_HOME.mkdir(parents=True, exist_ok=True)
+    _SETUP_MARKER.write_text(
+        f"setup_completed={time.strftime('%Y-%m-%dT%H:%M:%S')}\n"
+        f"python={sys.executable}\n"
+        f"platform={platform.system()}\n"
+        f"version={platform.python_version()}\n"
+    )
+# ---------------------------------------------------------------------------
+# Main wizard
+# ---------------------------------------------------------------------------
+def run_wizard(auto: bool = False) -> None:
+    """Run the interactive setup wizard.
+    Args:
+        auto: If True, use defaults without prompting (for npm postinstall
+              or CI environments).
+    """
+    interactive = is_interactive() and not auto
-def run_wizard() -> None:
-    """Run the interactive setup wizard."""
     print()
-    print("SuperLocalMemory V3 — First Time Setup")
-    print("=" * 40)
+    print("╔══════════════════════════════════════════════════════════╗")
+    print("║  SuperLocalMemory V3 — Setup Wizard                    ║")
+    print("║  by Varun Pratap Bhardwaj / Qualixar                   ║")
+    print("╚══════════════════════════════════════════════════════════╝")
+    print()
+    # -- Step 1: System check --
+    print("─── Step 1/5: System Check ───")
+    print()
+    py_ver = platform.python_version()
+    py_ok = sys.version_info >= (3, 11)
+    ram_gb = _get_ram_gb()
+    print(f"  Python:   {py_ver} {'✓' if py_ok else '✗ (3.11+ required)'}")
+    print(f"  Platform: {platform.system()} {platform.machine()}")
+    if ram_gb > 0:
+        print(f"  RAM:      {ram_gb:.1f} GB {'✓' if ram_gb >= 4 else '⚠ (4GB+ recommended)'}")
+    print(f"  Data dir: {_SLM_HOME}")
+    # Check sentence-transformers
+    st_ok = False
+    try:
+        import sentence_transformers  # noqa: F401
+        st_ok = True
+        print(f"  sentence-transformers: ✓")
+    except ImportError:
+        print(f"  sentence-transformers: ✗ (not installed)")
+        print(f"    Run: pip install 'sentence-transformers>=4.0.0'")
+    if not py_ok:
+        print("\n  ✗ Python 3.11+ is required. Please upgrade Python.")
+        print("    https://python.org/downloads/")
+        return
+    # -- Step 2: Mode selection --
     print()
-    print("Choose your operating mode:")
+    print("─── Step 2/5: Choose Operating Mode ───")
     print()
-    print("  [A] Local Guardian (default)")
-    print("      Zero cloud. Zero LLM. Your data never leaves your machine.")
+    print("  [A] Local Guardian (recommended)")
+    print("      Zero cloud. Zero LLM. Full privacy.")
     print("      EU AI Act compliant. Works immediately.")
     print()
     print("  [B] Smart Local")
-    print("      Local LLM via Ollama for answer synthesis.")
-    print("      Still private — nothing leaves your machine.")
+    print("      Local LLM via Ollama for enrichment.")
+    print("      Data stays on your machine.")
     print()
     print("  [C] Full Power")
-    print("      Cloud LLM for best accuracy (~78% on LoCoMo).")
-    print("      Requires: API key from a supported provider.")
+    print("      Cloud LLM for maximum accuracy.")
+    print("      Requires API key.")
     print()
-    choice = input("Select mode [A/B/C] (default: A): ").strip().lower() or "a"
+    if interactive:
+        choice = _prompt("  Select mode [A/B/C] (default: A): ", "a").lower()
+    else:
+        choice = "a"
+        print("  Auto-selecting Mode A (non-interactive)")
     if choice not in ("a", "b", "c"):
-        print(f"Invalid choice: {choice}. Using Mode A.")
+        print(f"  Invalid choice '{choice}', using Mode A.")
         choice = "a"
     from superlocalmemory.core.config import SLMConfig
     from superlocalmemory.storage.models import Mode
-    if choice == "a":
-        config = SLMConfig.for_mode(Mode.A)
-        config.save()
-        print()
-        print("Mode A configured. Zero cloud, zero LLM.")
-        print(f"Config saved to: {config.base_dir / 'config.json'}")
+    mode_map = {"a": Mode.A, "b": Mode.B, "c": Mode.C}
+    config = SLMConfig.for_mode(mode_map[choice])
-    elif choice == "b":
-        config = SLMConfig.for_mode(Mode.B)
+    if choice == "b":
         print()
-        print("Checking for Ollama...")
         if shutil.which("ollama"):
-            print("  Ollama found!")
+            print("  ✓ Ollama found!")
         else:
-            print("  Ollama not found. Install it from https://ollama.ai")
-            print("  After installing, run: ollama pull llama3.2")
-        config.save()
-        print(f"Config saved to: {config.base_dir / 'config.json'}")
+            print("  ⚠ Ollama not found. Install: https://ollama.ai")
+            print("    After installing: ollama pull llama3.2")
-    elif choice == "c":
-        config = SLMConfig.for_mode(Mode.C)
+    if choice == "c" and interactive:
         configure_provider(config)
+    else:
+        config.save()
+    mode_names = {"a": "Local Guardian", "b": "Smart Local", "c": "Full Power"}
+    print(f"\n  ✓ Mode {choice.upper()} ({mode_names[choice]}) configured")
+    # -- Step 3: Download embedding model --
     print()
-    print("Ready! Your AI now remembers you.")
+    print("─── Step 3/5: Download Embedding Model ───")
+    if not st_ok:
+        print("  ⚠ Skipped (sentence-transformers not installed)")
+        print("    Models will download on first use.")
+    else:
+        embed_ok = _download_model(_EMBED_MODEL, "Embedding model")
+        if not embed_ok:
+            print("  ⚠ Model will download on first use (may take a few minutes)")
+    # -- Step 4: Download reranker model --
     print()
+    print("─── Step 4/5: Download Reranker Model ───")
+    if not st_ok:
+        print("  ⚠ Skipped (sentence-transformers not installed)")
+    else:
+        _download_reranker(_RERANKER_MODEL)
-def configure_provider(config: object) -> None:
-    """Configure LLM provider for Mode C.
+    # -- Step 5: Verification --
+    print()
+    print("─── Step 5/5: Verification ───")
-    Args:
-        config: An SLMConfig instance (typed as object to avoid circular import
-                at module level; actual type checked at runtime).
+    if st_ok:
+        verified = _verify_installation()
+    else:
+        print("  ⚠ Skipped (sentence-transformers not installed)")
+        verified = False
+    # -- Done --
+    _mark_complete()
+    print()
+    print("╔══════════════════════════════════════════════════════════╗")
+    if verified:
+        print("║  ✓ Setup Complete — SuperLocalMemory is ready!         ║")
+    else:
+        print("║  ✓ Setup Complete — basic config saved                 ║")
+        print("║    Models will auto-download on first use              ║")
+    print("╚══════════════════════════════════════════════════════════╝")
+    print()
+    print("  Quick start:")
+    print('    slm remember "your first memory"')
+    print('    slm recall "search query"')
+    print("    slm dashboard")
+    print()
+    print("  Need help?")
+    print("    slm doctor     — diagnose issues")
+    print("    slm --help     — all commands")
+    print("    https://github.com/qualixar/superlocalmemory")
+    print()
+# ---------------------------------------------------------------------------
+# First-use auto-trigger
+# ---------------------------------------------------------------------------
+def check_first_use(command: str) -> None:
+    """Check if setup is needed before running a command.
+    Called from main.py before dispatching any command.
+    Skips for commands that don't need setup (setup, hook, --version, --help).
     """
+    # Commands that work without setup
+    _SKIP_COMMANDS = {"setup", "init", "hook", "hooks", "reap", "mcp"}
+    if command in _SKIP_COMMANDS:
+        return
+    if is_setup_complete():
+        return
+    # Non-interactive: use defaults silently, don't block the command
+    if not is_interactive():
+        # Just create config with defaults and mark complete
+        try:
+            from superlocalmemory.core.config import SLMConfig
+            from superlocalmemory.storage.models import Mode
+            config = SLMConfig.for_mode(Mode.A)
+            config.save()
+            _mark_complete()
+        except Exception:
+            pass
+        return
+    # Interactive: run the full wizard
+    print()
+    print("  First time using SuperLocalMemory!")
+    print("  Running setup wizard...\n")
+    run_wizard()
+# ---------------------------------------------------------------------------
+# Mode C provider config (preserved from original)
+# ---------------------------------------------------------------------------
+def configure_provider(config: object) -> None:
+    """Configure LLM provider for Mode C."""
     from superlocalmemory.core.config import SLMConfig
     from superlocalmemory.storage.models import Mode
     presets = SLMConfig.provider_presets()
     print()
-    print("Choose your LLM provider:")
+    print("  Choose your LLM provider:")
     print()
     providers = list(presets.keys())
     for i, name in enumerate(providers, 1):
         preset = presets[name]
-        print(f"  [{i}] {name.capitalize()} — {preset['model']}")
+        print(f"    [{i}] {name.capitalize()} — {preset['model']}")
     print()
-    idx = input(f"Select provider [1-{len(providers)}]: ").strip()
+    idx = _prompt(f"  Select provider [1-{len(providers)}]: ", "1")
     try:
         provider_name = providers[int(idx) - 1]
     except (ValueError, IndexError):
-        print("Invalid choice. Using OpenAI.")
+        print("  Invalid choice. Using OpenAI.")
         provider_name = "openai"
     preset = presets[provider_name]
-    # Resolve API key from environment or prompt
+    # Resolve API key
     env_key = preset.get("env_key", "")
     api_key = ""
     if env_key:
@@ -111,10 +470,10 @@ def configure_provider(config: object) -> None:
         if existing:
             print(f"  Found {env_key} in environment.")
             api_key = existing
-        else:
-            api_key = input(
+        elif is_interactive():
+            api_key = _prompt(
                 f"  Enter your {provider_name.capitalize()} API key: ",
-            ).strip()
+            )
     updated = SLMConfig.for_mode(
         Mode.C,
@@ -126,4 +485,3 @@ def configure_provider(config: object) -> None:
     updated.save()
     print(f"  Provider: {provider_name}")
     print(f"  Model: {preset['model']}")
-    print(f"Config saved to: {updated.base_dir / 'config.json'}")

package/src/superlocalmemory/core/config.py CHANGED Viewed

@@ -619,7 +619,7 @@ class SLMConfig:
             # The user's explicit choice always wins.
             if "cross_encoder_backend" not in rt:
                 rt.setdefault("cross_encoder_model", "cross-encoder/ms-marco-MiniLM-L-12-v2")
-                rt["cross_encoder_backend"] = "onnx"
+                rt["cross_encoder_backend"] = ""  # V3.3.18: PyTorch (ONNX CoreML leaks on ARM64)
                 # Only auto-enable if user didn't explicitly set the field
                 rt.setdefault("use_cross_encoder", True)
             config.retrieval = RetrievalConfig(**{

package/src/superlocalmemory/core/embeddings.py CHANGED Viewed

@@ -191,11 +191,18 @@ class EmbeddingService:
                 )
                 if not resp_line:
                     logger.warning(
-                        "Embedding worker timed out after %ds. On first run, model "
-                        "download can take several minutes. Run 'slm doctor' to "
-                        "diagnose or 'slm warmup' to pre-download the model.",
+                        "Embedding worker timed out after %ds. "
+                        "Run 'slm setup' to download models and verify installation.",
                         _SUBPROCESS_RESPONSE_TIMEOUT,
                     )
+                    # Print to stderr so CLI users see this even without logging
+                    print(
+                        f"\n⚠ Embedding worker did not respond within "
+                        f"{_SUBPROCESS_RESPONSE_TIMEOUT}s.\n"
+                        f"  Run: slm setup   (download models + verify)\n"
+                        f"  Run: slm doctor  (diagnose issues)\n",
+                        file=sys.stderr,
+                    )
                     self._kill_worker()
                     return None
                 resp = json.loads(resp_line)

package/src/superlocalmemory/core/engine_wiring.py CHANGED Viewed

@@ -394,6 +394,46 @@ def _init_hopfield_channel(
         return None
+def _init_quantization_aware_search(
+    vector_store: Any,
+    db: DatabaseManager,
+    config: SLMConfig,
+) -> Any | None:
+    """Create QuantizationAwareSearch if quantized store is available.
+    Returns None on failure — SemanticChannel falls back to VectorStore KNN.
+    Stateless wrapper: zero memory overhead, no workers, no threads.
+    """
+    if vector_store is None:
+        return None
+    try:
+        from superlocalmemory.retrieval.quantization_aware_search import QuantizationAwareSearch
+        from superlocalmemory.storage.quantized_store import QuantizedEmbeddingStore
+        from superlocalmemory.math.polar_quant import PolarQuantEncoder
+        from superlocalmemory.math.qjl import QJLEncoder
+        polar = PolarQuantEncoder(dimension=config.embedding.dimension)
+        qjl: QJLEncoder | None = None
+        try:
+            qjl = QJLEncoder(dimension=config.embedding.dimension)
+        except Exception:
+            pass  # QJL is optional (HR-07)
+        q_store = QuantizedEmbeddingStore(
+            db=db, polar=polar, qjl=qjl, config=config.quantization,
+        )
+        qas = QuantizationAwareSearch(
+            vector_store=vector_store,
+            quantized_store=q_store,
+            config=config.quantization,
+        )
+        logger.info("QuantizationAwareSearch initialized (TurboQuant 3-tier search)")
+        return qas
+    except Exception as exc:
+        logger.debug("QuantizationAwareSearch init failed (non-fatal): %s", exc)
+        return None
 def init_retrieval(
     config: SLMConfig,
     db: DatabaseManager,
@@ -412,6 +452,9 @@ def init_retrieval(
     from superlocalmemory.retrieval.profile_channel import ProfileChannel
     from superlocalmemory.retrieval.bridge_discovery import BridgeDiscovery
+    # V3.3.19: TurboQuant 3-tier search (stateless, zero memory overhead)
+    qas = _init_quantization_aware_search(vector_store, db, config)
     channels: dict = {
         "semantic": SemanticChannel(
             db,
@@ -419,6 +462,7 @@ def init_retrieval(
             embedder=embedder,
             fisher_mode=config.math.fisher_mode,
             vector_store=vector_store,
+            quantization_aware_search=qas,
         ),
         "bm25": BM25Channel(db),
         "entity_graph": EntityGraphChannel(db, entity_resolver),

package/src/superlocalmemory/mcp/server.py CHANGED Viewed

@@ -53,7 +53,60 @@ def reset_engine():
     _engine = None
-# Register all tools and resources --------------------------------------------
+# Register tools and resources -------------------------------------------------
+#
+# V3.3.19: Trimmed from 38 tools to 15 essential tools.
+# IDEs cap at 50-100 tools total (Cursor, Antigravity, Windsurf).
+# 38 tools from SLM alone crowds out other MCP servers.
+#
+# Essential 15: the tools an AI agent actually needs during a session.
+# Admin/diagnostics tools remain available via CLI (`slm <command>`).
+# Set SLM_MCP_ALL_TOOLS=1 to enable all 38 tools (power users).
+import os as _os_reg
+_ESSENTIAL_TOOLS: frozenset[str] = frozenset({
+    # Core memory operations (8)
+    "remember", "recall", "search", "fetch",
+    "list_recent", "delete_memory", "update_memory", "get_status",
+    # Session lifecycle (3)
+    "session_init", "observe", "close_session",
+    # Memory management (2)
+    "forget", "run_maintenance",
+    # Infinite memory + learning (4)
+    "consolidate_cognitive", "get_soft_prompts",
+    "set_mode", "report_outcome",
+})
+_all_tools = _os_reg.environ.get("SLM_MCP_ALL_TOOLS") == "1"
+class _FilteredServer:
+    """Wraps FastMCP to only register essential tools.
+    Non-essential tools are silently skipped (not registered on the MCP
+    server). They remain available via CLI. When SLM_MCP_ALL_TOOLS=1,
+    all tools are registered (bypass filter).
+    """
+    __slots__ = ("_server", "_allowed")
+    def __init__(self, real_server: FastMCP, allowed: frozenset[str]) -> None:
+        self._server = real_server
+        self._allowed = allowed
+    def tool(self, *args, **kwargs):
+        def decorator(func):
+            if func.__name__ in self._allowed:
+                return self._server.tool(*args, **kwargs)(func)
+            return func  # Skip registration — still importable, just not MCP-visible
+        return decorator
+    def __getattr__(self, name):
+        return getattr(self._server, name)
+# Choose full or filtered registration target
+_target = server if _all_tools else _FilteredServer(server, _ESSENTIAL_TOOLS)
 from superlocalmemory.mcp.tools_core import register_core_tools
 from superlocalmemory.mcp.tools_v28 import register_v28_tools
@@ -62,12 +115,12 @@ from superlocalmemory.mcp.tools_active import register_active_tools
 from superlocalmemory.mcp.tools_v33 import register_v33_tools
 from superlocalmemory.mcp.resources import register_resources
-register_core_tools(server, get_engine)
-register_v28_tools(server, get_engine)
-register_v3_tools(server, get_engine)
-register_active_tools(server, get_engine)
-register_v33_tools(server, get_engine)
-register_resources(server, get_engine)
+register_core_tools(_target, get_engine)
+register_v28_tools(_target, get_engine)
+register_v3_tools(_target, get_engine)
+register_active_tools(_target, get_engine)
+register_v33_tools(_target, get_engine)
+register_resources(server, get_engine)  # Resources always registered (not tools)
 if __name__ == "__main__":

package/src/superlocalmemory/mcp/tools_core.py CHANGED Viewed

@@ -95,14 +95,20 @@ def register_core_tools(server, get_engine: Callable) -> None:
         Extracts atomic facts, resolves entities, builds graph edges,
         and indexes for 4-channel retrieval.
         """
+        import asyncio
         try:
             from superlocalmemory.core.worker_pool import WorkerPool
             pool = WorkerPool.shared()
-            result = pool.store(content, metadata={
-                "tags": tags, "project": project,
-                "importance": importance, "agent_id": agent_id,
-                "session_id": session_id,
-            })
+            # V3.3.19: Run store in thread pool so it doesn't block the
+            # MCP event loop. Before this fix, every remember call blocked
+            # the IDE/agent for 11-17s in Mode B (Ollama LLM fact extraction).
+            result = await asyncio.to_thread(
+                pool.store, content, metadata={
+                    "tags": tags, "project": project,
+                    "importance": importance, "agent_id": agent_id,
+                    "session_id": session_id,
+                },
+            )
             if result.get("ok"):
                 _emit_event("memory.created", {
                     "content_preview": content[:80],
@@ -118,10 +124,12 @@ def register_core_tools(server, get_engine: Callable) -> None:
     @server.tool()
     async def recall(query: str, limit: int = 10, agent_id: str = "mcp_client") -> dict:
         """Search memories by semantic query with 4-channel retrieval, RRF fusion, and reranking."""
+        import asyncio
         try:
             from superlocalmemory.core.worker_pool import WorkerPool
             pool = WorkerPool.shared()
-            result = pool.recall(query, limit=limit)
+            # V3.3.19: Run in thread pool to avoid blocking MCP event loop
+            result = await asyncio.to_thread(pool.recall, query, limit=limit)
             if result.get("ok"):
                 # Record implicit feedback: every returned result is a recall_hit
                 try:

package/src/superlocalmemory/retrieval/bridge_discovery.py CHANGED Viewed

@@ -102,7 +102,11 @@ class BridgeDiscovery:
                 for f in entity_facts[:5]:
                     if f.fact_id not in seen:
                         seen.add(f.fact_id)
-                        bridges.append((f.fact_id, 0.7))
+                        overlap = (
+                            len(set(f.canonical_entities) & entities_a)
+                            + len(set(f.canonical_entities) & entities_b)
+                        )
+                        bridges.append((f.fact_id, min(1.0, 0.5 + overlap * 0.15)))
             if len(bridges) >= max_bridges:
                 break

package/src/superlocalmemory/retrieval/engine.py CHANGED Viewed

@@ -146,13 +146,14 @@ class RetrievalEngine:
         fused = weighted_rrf(ch_results, strat.weights, k=self._config.rrf_k)
         # Bridge discovery for multi-hop queries
+        # V3.3.19: Only bridge.discover() (86ms). Removed bridge.spreading_activation()
+        # which did per-node SQL queries across 254K edges → 78s latency.
+        # The SYNAPSE SA channel already provides proper SA with in-memory caching.
         if self._bridge is not None and strat.query_type in ("multi_hop", "entity", "factual", "general"):
             try:
                 seed_ids = [fr.fact_id for fr in fused[:10]]
                 bridges = self._bridge.discover(seed_ids, profile_id, max_bridges=10)
-                spread = self._bridge.spreading_activation(seed_ids, profile_id)
-                extra = bridges + spread
-                for fid, score in extra:
+                for fid, score in bridges:
                     if not any(fr.fact_id == fid for fr in fused):
                         fused.append(FusionResult(
                             fact_id=fid, fused_score=score * 0.8,

package/src/superlocalmemory/retrieval/entity_channel.py CHANGED Viewed

@@ -89,8 +89,8 @@ class EntityGraphChannel:
     def __init__(
         self, db: DatabaseManager,
         entity_resolver: EntityResolver | None = None,
-        decay: float = 0.7, activation_threshold: float = 0.1,
-        max_hops: int = 3,
+        decay: float = 0.7, activation_threshold: float = 0.05,
+        max_hops: int = 4,
     ) -> None:
         self._db = db
         self._resolver = entity_resolver

package/src/superlocalmemory/retrieval/quantization_aware_search.py CHANGED Viewed

@@ -98,7 +98,11 @@ class QuantizationAwareSearch:
     ) -> list[tuple[str, float]]:
         """Tier 1: float32 exact cosine via VectorStore."""
         try:
-            return self._vector_store.search(query, profile_id, top_k)
+            return self._vector_store.search(
+                query_embedding=list(query) if hasattr(query, 'tolist') else query,
+                top_k=top_k,
+                profile_id=profile_id,
+            )
         except Exception as exc:
             logger.debug("float32 search failed: %s", exc)
             return []
@@ -109,9 +113,13 @@ class QuantizationAwareSearch:
         """Tier 2: int8 approximate via VectorStore.search_int8.
         Applies 0.98x penalty to account for int8 quantization error.
+        Gracefully returns [] if VectorStore lacks search_int8 method.
         """
+        fn = getattr(self._vector_store, "search_int8", None)
+        if fn is None:
+            return []
         try:
-            raw = self._vector_store.search_int8(query, profile_id, top_k)
+            raw = fn(query, profile_id=profile_id, top_k=top_k)
             return [(fid, score * _INT8_PENALTY) for fid, score in raw]
         except Exception as exc:
             logger.debug("int8 search failed: %s", exc)

package/src/superlocalmemory/retrieval/semantic_channel.py CHANGED Viewed

@@ -84,6 +84,7 @@ class SemanticChannel:
         embedder: object | None = None,
         fisher_mode: str = "simplified",
         vector_store: Any | None = None,
+        quantization_aware_search: Any | None = None,
     ) -> None:
         self._db = db
         self._temperature = fisher_temperature
@@ -92,6 +93,8 @@ class SemanticChannel:
         # Lazily instantiated full metric (avoids import cost when not needed)
         self._full_metric: object | None = None
         self._vector_store = vector_store
+        # V3.3.19: TurboQuant 3-tier search (stateless, optional)
+        self._qas = quantization_aware_search
     def search(
         self,
@@ -137,11 +140,26 @@ class SemanticChannel:
         profile_id: str,
         top_k: int,
     ) -> list[tuple[str, float]]:
-        """KNN via VectorStore, then Fisher-Rao re-scoring on top-K subset."""
-        # Step 1: Fast KNN -- get 2x top_k candidates for Fisher re-ranking
-        knn_results = self._vector_store.search(
-            query_embedding, top_k=top_k * 2, profile_id=profile_id,
-        )
+        """KNN via VectorStore (or QAS 3-tier), then Fisher-Rao re-scoring."""
+        # V3.3.19: Try TurboQuant 3-tier search first (float32 + int8 + polar)
+        if self._qas is not None:
+            try:
+                knn_results = self._qas.search(
+                    query_embedding=q_vec, profile_id=profile_id,
+                    top_k=top_k * 2,
+                )
+            except Exception:
+                knn_results = []
+            # Fall through to VectorStore if QAS returned nothing
+            if not knn_results:
+                knn_results = self._vector_store.search(
+                    query_embedding, top_k=top_k * 2, profile_id=profile_id,
+                )
+        else:
+            # Step 1: Fast KNN -- get 2x top_k candidates for Fisher re-ranking
+            knn_results = self._vector_store.search(
+                query_embedding, top_k=top_k * 2, profile_id=profile_id,
+            )
         if not knn_results:
             return []  # Caller falls through to full scan

package/src/superlocalmemory/retrieval/spreading_activation.py CHANGED Viewed

@@ -97,7 +97,7 @@ class SpreadingActivation:
         try:
             # Step 0: Get seed nodes from VectorStore KNN
             seed_results = self._vector_store.search(
-                query, top_k=self._config.top_m,
+                query, top_k=self._config.top_m, profile_id=profile_id,
             )
             if not seed_results:
                 return []
@@ -125,7 +125,7 @@ class SpreadingActivation:
             return results[:top_k]
         except Exception as exc:
-            logger.debug(
+            logger.warning(
                 "SpreadingActivation.search failed for profile %s: %s",
                 profile_id, exc,
             )

package/src/superlocalmemory/retrieval/strategy.py CHANGED Viewed

@@ -35,10 +35,34 @@ _TEMPORAL_WORDS: frozenset[str] = frozenset({
 })
 _MULTI_HOP_PHRASES: tuple[str, ...] = (
+    # Original 8 phrases
     "and then", "after that", "because", "how did",
     "as a result", "led to", "connection between", "relationship between",
+    # V3.3.19: LoCoMo-style multi-hop patterns (causal/temporal chains)
+    "what happened when", "what was happening",
+    "during the time", "at the same time",
+    "how did it affect", "what changed after",
+    "what did they do after", "what did they do before",
+    "what was the result", "what was the outcome",
+    "what was the reason", "why did they",
+    "in response to", "as a consequence",
+    "prior to", "following that", "subsequent to",
+    "in the meantime", "at that point",
+    "which led to", "which caused", "which resulted in",
 )
+# Words that signal causal/temporal chain when combined with 2+ entities.
+# Excludes common instruction verbs (tell, help) to avoid false positives
+# on queries like "Tell me about Alice and Bob".
+_CAUSAL_TEMPORAL_WORDS: frozenset[str] = frozenset({
+    "before", "after", "when", "while", "because", "then",
+    "during", "since", "until", "once",
+    "affect", "cause", "change", "happen", "result",
+    "influence", "impact", "lead", "meet",
+    "start", "stop", "begin", "end", "move", "leave",
+    "join", "visit", "return",
+})
 _AGGREGATION_WORDS: frozenset[str] = frozenset({
     "all", "list", "every", "everything", "various", "different",
     "many", "several", "multiple", "summarize", "overview",
@@ -80,22 +104,30 @@ class QueryStrategyClassifier:
         # Strip punctuation from words so "january?" matches "january"
         words = set(re.sub(r"[^\w\s'-]", "", q).split())
-        # Check multi_hop BEFORE temporal — phrases like "connection between"
-        # must not be short-circuited by the word "between" in _TEMPORAL_WORDS.
+        # Check multi_hop phrases FIRST (exact phrase match)
         if any(p in q for p in _MULTI_HOP_PHRASES):
             return "multi_hop"
+        # Extract proper nouns EARLY for the multi-entity heuristic
+        _SENTENCE_STARTERS = {"What", "Where", "Who", "Which", "How", "When",
+                              "Does", "Did", "Can", "Could", "Would", "Should",
+                              "Are", "Is", "Was", "Were", "Has", "Have", "The", "Tell"}
+        proper_nouns = [m for m in re.findall(r"\b[A-Z][a-z]{1,}\b", query)
+                        if m not in _SENTENCE_STARTERS]
+        # V3.3.19: 2+ entities + causal/temporal word → multi_hop
+        # This MUST fire BEFORE the temporal check, otherwise "What did
+        # Alice study before moving to New York?" would classify as
+        # "temporal" instead of "multi_hop".
+        if len(proper_nouns) >= 2 and words & _CAUSAL_TEMPORAL_WORDS:
+            return "multi_hop"
         if words & _TEMPORAL_WORDS:
             return "temporal"
         if words & _AGGREGATION_WORDS:
             return "aggregation"
         if any(w in q for w in _OPINION_WORDS):
             return "opinion"
-        # Proper nouns — exclude common sentence-initial words
-        _SENTENCE_STARTERS = {"What", "Where", "Who", "Which", "How", "When",
-                              "Does", "Did", "Can", "Could", "Would", "Should",
-                              "Are", "Is", "Was", "Were", "Has", "Have", "The", "Tell"}
-        proper_nouns = [m for m in re.findall(r"\b[A-Z][a-z]{1,}\b", query)
-                        if m not in _SENTENCE_STARTERS]
         if len(proper_nouns) >= 2:
             return "entity"
         if q.startswith(("what ", "where ", "who ", "which ", "how ")):