npm - superlocalmemory - Versions diffs - 3.3.20 → 3.3.21 - Mend

superlocalmemory 3.3.20 → 3.3.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (78) hide show

package/package.json +1 -1
package/pyproject.toml +9 -1
package/src/superlocalmemory/cli/commands.py +138 -22
package/src/superlocalmemory/cli/daemon.py +372 -0
package/src/superlocalmemory/cli/main.py +8 -0
package/src/superlocalmemory/cli/pending_store.py +158 -0
package/src/superlocalmemory/cli/setup_wizard.py +39 -6
package/src/superlocalmemory/code_graph/__init__.py +46 -0
package/src/superlocalmemory/code_graph/blast_radius.py +177 -0
package/src/superlocalmemory/code_graph/bridge/__init__.py +36 -0
package/src/superlocalmemory/code_graph/bridge/entity_resolver.py +464 -0
package/src/superlocalmemory/code_graph/bridge/event_listeners.py +195 -0
package/src/superlocalmemory/code_graph/bridge/fact_enricher.py +159 -0
package/src/superlocalmemory/code_graph/bridge/hebbian_linker.py +170 -0
package/src/superlocalmemory/code_graph/bridge/temporal_checker.py +152 -0
package/src/superlocalmemory/code_graph/changes.py +363 -0
package/src/superlocalmemory/code_graph/communities.py +299 -0
package/src/superlocalmemory/code_graph/config.py +88 -0
package/src/superlocalmemory/code_graph/database.py +482 -0
package/src/superlocalmemory/code_graph/extractors/__init__.py +78 -0
package/src/superlocalmemory/code_graph/extractors/python.py +413 -0
package/src/superlocalmemory/code_graph/extractors/typescript.py +556 -0
package/src/superlocalmemory/code_graph/flows.py +350 -0
package/src/superlocalmemory/code_graph/git_hooks.py +226 -0
package/src/superlocalmemory/code_graph/graph_engine.py +295 -0
package/src/superlocalmemory/code_graph/graph_store.py +158 -0
package/src/superlocalmemory/code_graph/incremental.py +200 -0
package/src/superlocalmemory/code_graph/models.py +130 -0
package/src/superlocalmemory/code_graph/parser.py +507 -0
package/src/superlocalmemory/code_graph/resolver.py +321 -0
package/src/superlocalmemory/code_graph/search.py +460 -0
package/src/superlocalmemory/code_graph/service.py +95 -0
package/src/superlocalmemory/code_graph/watcher.py +207 -0
package/src/superlocalmemory/core/embedding_worker.py +4 -2
package/src/superlocalmemory/core/embeddings.py +8 -2
package/src/superlocalmemory/core/engine.py +32 -0
package/src/superlocalmemory/core/engine_wiring.py +5 -0
package/src/superlocalmemory/core/store_pipeline.py +23 -1
package/src/superlocalmemory/encoding/fact_extractor.py +68 -7
package/src/superlocalmemory/infra/event_bus.py +5 -0
package/src/superlocalmemory/mcp/server.py +23 -0
package/src/superlocalmemory/mcp/tools_code_graph.py +1592 -0
package/src/superlocalmemory/retrieval/engine.py +137 -2
package/src/superlocalmemory/retrieval/semantic_channel.py +6 -2
package/src/superlocalmemory/retrieval/spreading_activation.py +5 -3
package/src/superlocalmemory/retrieval/strategy.py +16 -0
package/src/superlocalmemory/server/api.py +4 -2
package/src/superlocalmemory/server/ui.py +5 -2
package/src/superlocalmemory/storage/schema_code_graph.py +239 -0
package/src/superlocalmemory/ui/index.html +1879 -0
package/src/superlocalmemory/ui/js/agents.js +192 -0
package/src/superlocalmemory/ui/js/auto-settings.js +399 -0
package/src/superlocalmemory/ui/js/behavioral.js +276 -0
package/src/superlocalmemory/ui/js/clusters.js +206 -0
package/src/superlocalmemory/ui/js/compliance.js +252 -0
package/src/superlocalmemory/ui/js/core.js +246 -0
package/src/superlocalmemory/ui/js/dashboard.js +110 -0
package/src/superlocalmemory/ui/js/events.js +178 -0
package/src/superlocalmemory/ui/js/fact-detail.js +92 -0
package/src/superlocalmemory/ui/js/feedback.js +333 -0
package/src/superlocalmemory/ui/js/graph-core.js +447 -0
package/src/superlocalmemory/ui/js/graph-filters.js +220 -0
package/src/superlocalmemory/ui/js/graph-interactions.js +351 -0
package/src/superlocalmemory/ui/js/graph-ui.js +214 -0
package/src/superlocalmemory/ui/js/ide-status.js +102 -0
package/src/superlocalmemory/ui/js/init.js +45 -0
package/src/superlocalmemory/ui/js/learning.js +435 -0
package/src/superlocalmemory/ui/js/lifecycle.js +298 -0
package/src/superlocalmemory/ui/js/math-health.js +98 -0
package/src/superlocalmemory/ui/js/memories.js +264 -0
package/src/superlocalmemory/ui/js/modal.js +357 -0
package/src/superlocalmemory/ui/js/patterns.js +93 -0
package/src/superlocalmemory/ui/js/profiles.js +236 -0
package/src/superlocalmemory/ui/js/recall-lab.js +292 -0
package/src/superlocalmemory/ui/js/search.js +59 -0
package/src/superlocalmemory/ui/js/settings.js +224 -0
package/src/superlocalmemory/ui/js/timeline.js +32 -0
package/src/superlocalmemory/ui/js/trust-dashboard.js +73 -0

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "superlocalmemory",
-  "version": "3.3.20",
+  "version": "3.3.21",
   "description": "Information-geometric agent memory with mathematical guarantees. 4-channel retrieval, Fisher-Rao similarity, zero-LLM mode, EU AI Act compliant. Works with Claude, Cursor, Windsurf, and 17+ AI tools.",
   "keywords": [
     "ai-memory",

package/pyproject.toml CHANGED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "superlocalmemory"
-version = "3.3.20"
+version = "3.3.21"
 description = "Information-geometric agent memory with mathematical guarantees"
 readme = "README.md"
 license = {text = "MIT"}
@@ -44,6 +44,11 @@ dependencies = [
     "lightgbm>=4.0.0",
     "diskcache>=5.6.0",
     "orjson>=3.9.0",
+    # CodeGraph — code knowledge graph (v3.4)
+    "tree-sitter>=0.23.0,<1",
+    "tree-sitter-language-pack>=0.3,<2",
+    "rustworkx>=0.15,<1",
+    "watchdog>=4.0,<6",
 ]
 [project.optional-dependencies]
@@ -92,6 +97,9 @@ build-backend = "setuptools.build_meta"
 [tool.setuptools.packages.find]
 where = ["src"]
+[tool.setuptools.package-data]
+superlocalmemory = ["ui/**/*"]
 [tool.pytest.ini_options]
 testpaths = ["tests"]
 pythonpath = ["src"]

package/src/superlocalmemory/cli/commands.py CHANGED Viewed

@@ -56,6 +56,8 @@ def dispatch(args: Namespace) -> None:
         "consolidate": cmd_consolidate,
         "soft-prompts": cmd_soft_prompts,
         "reap": cmd_reap,
+        # V3.3.21 daemon
+        "serve": cmd_serve,
     }
     handler = handlers.get(args.command)
     if handler:
@@ -65,6 +67,49 @@ def dispatch(args: Namespace) -> None:
         sys.exit(1)
+# -- Daemon serve mode (V3.3.21) ------------------------------------------
+def cmd_serve(args: Namespace) -> None:
+    """Start/stop the SLM daemon for instant CLI response."""
+    from superlocalmemory.cli.daemon import is_daemon_running, ensure_daemon, stop_daemon
+    action = getattr(args, 'action', 'start')
+    if action == 'stop':
+        if stop_daemon():
+            print("Daemon stopped.")
+        else:
+            print("Daemon was not running.")
+        return
+    if action == 'status':
+        if is_daemon_running():
+            from superlocalmemory.cli.daemon import daemon_request
+            status = daemon_request("GET", "/status")
+            if status:
+                print(f"Daemon: RUNNING (PID {status['pid']}, "
+                      f"mode={status['mode']}, facts={status['fact_count']}, "
+                      f"uptime={status['uptime_s']}s, idle={status['idle_s']}s)")
+            else:
+                print("Daemon: RUNNING (could not get status)")
+        else:
+            print("Daemon: NOT RUNNING")
+        return
+    # Default: start
+    if is_daemon_running():
+        print("Daemon already running.")
+        return
+    print("Starting SLM daemon (engine warming up)...")
+    if ensure_daemon():
+        print("Daemon started \u2713 — CLI commands are now instant.")
+        print("  slm serve status  — check daemon status")
+        print("  slm serve stop    — stop daemon and free RAM")
+    else:
+        print("Failed to start daemon. Check ~/.superlocalmemory/logs/daemon.log")
 # -- Setup & Config (no --json — interactive commands) ---------------------
@@ -236,15 +281,26 @@ def cmd_list(args: Namespace) -> None:
     if not facts:
         print("No memories stored yet.")
-        return
+    else:
+        print(f"Recent memories ({len(facts)}):\n")
+        for i, f in enumerate(facts, 1):
+            date = (f.created_at or "")[:19]
+            ftype_raw = getattr(f, "fact_type", "")
+            ftype = ftype_raw.value if hasattr(ftype_raw, "value") else str(ftype_raw)
+            content = f.content[:100] + ("..." if len(f.content) > 100 else "")
+            print(f"  {i:3d}. [{date}] ({ftype}) {content}")
-    print(f"Recent memories ({len(facts)}):\n")
-    for i, f in enumerate(facts, 1):
-        date = (f.created_at or "")[:19]
-        ftype_raw = getattr(f, "fact_type", "")
-        ftype = ftype_raw.value if hasattr(ftype_raw, "value") else str(ftype_raw)
-        content = f.content[:100] + ("..." if len(f.content) > 100 else "")
-        print(f"  {i:3d}. [{date}] ({ftype}) {content}")
+    # V3.3.21: Show pending memories (store-first pattern)
+    try:
+        from superlocalmemory.cli.pending_store import get_pending
+        pending = get_pending(limit=10)
+        if pending:
+            print(f"\nPending (processing in background): {len(pending)}")
+            for p in pending:
+                content = p["content"][:80] + ("..." if len(p["content"]) > 80 else "")
+                print(f"  \u23f3 [{p['created_at'][:19]}] {content}")
+    except Exception:
+        pass
 def cmd_remember(args: Namespace) -> None:
@@ -254,25 +310,56 @@ def cmd_remember(args: Namespace) -> None:
     use_json = getattr(args, 'json', False)
     sync_mode = getattr(args, 'sync_mode', False)
-    # V3.3.19: Async by default — return instantly, process in background.
-    # Use --sync to wait for completion (e.g., when you need fact_ids back).
+    # V3.3.21: Route through daemon for instant remember (no cold start).
+    # If daemon is running, send request directly (~0.1s).
+    # If not, use store-first pattern (pending.db) as fallback.
     if not sync_mode:
+        # Try daemon first
+        try:
+            from superlocalmemory.cli.daemon import is_daemon_running, daemon_request, ensure_daemon
+            if is_daemon_running() or ensure_daemon():
+                result = daemon_request("POST", "/remember", {
+                    "content": args.content,
+                    "tags": args.tags or "",
+                })
+                if result and "fact_ids" in result:
+                    if use_json:
+                        from superlocalmemory.cli.json_output import json_print
+                        json_print("remember", data=result)
+                    else:
+                        print(f"Stored \u2713 {result['count']} facts (via daemon).")
+                    return
+        except Exception:
+            pass  # Fall through to pending store
+        # Fallback: store-first pattern (Option C — zero data loss)
         import subprocess
-        cmd = [sys.executable, "-m", "superlocalmemory.cli.main", "remember", args.content]
+        from superlocalmemory.cli.pending_store import store_pending
+        row_id = store_pending(
+            content=args.content,
+            tags=args.tags or "",
+        )
+        cmd = [sys.executable, "-m", "superlocalmemory.cli.main",
+               "remember", args.content, "--sync"]
         if args.tags:
             cmd.extend(["--tags", args.tags])
-        if use_json:
-            cmd.append("--json")
-        # Spawn detached subprocess — parent exits immediately
-        subprocess.Popen(
-            cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL,
-            start_new_session=True,
-        )
+        log_dir = __import__("pathlib").Path.home() / ".superlocalmemory" / "logs"
+        log_dir.mkdir(parents=True, exist_ok=True)
+        log_file = log_dir / "async-remember.log"
+        with open(log_file, "a") as lf:
+            subprocess.Popen(
+                cmd, stdout=subprocess.DEVNULL, stderr=lf,
+                start_new_session=True,
+            )
         if use_json:
             from superlocalmemory.cli.json_output import json_print
-            json_print("remember", data={"queued": True, "async": True})
+            json_print("remember", data={"queued": True, "async": True,
+                                         "pending_id": row_id, "safe": True})
         else:
-            print("Queued for background processing.")
+            print(f"Stored \u2713 (pending_id={row_id}) \u2014 processing in background.")
         return
     from superlocalmemory.core.engine import MemoryEngine
@@ -304,11 +391,40 @@ def cmd_remember(args: Namespace) -> None:
 def cmd_recall(args: Namespace) -> None:
-    """Search memories via the engine."""
+    """Search memories via the engine — routes through daemon if available."""
+    use_json = getattr(args, 'json', False)
+    # V3.3.21: Route through daemon for instant response (no cold start).
+    # Falls back to direct engine if daemon not running.
+    try:
+        from superlocalmemory.cli.daemon import is_daemon_running, daemon_request, ensure_daemon
+        if is_daemon_running() or ensure_daemon():
+            from urllib.parse import quote
+            result = daemon_request(
+                "GET", f"/recall?q={quote(args.query)}&limit={args.limit}",
+            )
+            if result and "results" in result:
+                # Format daemon response same as engine response
+                if use_json:
+                    from superlocalmemory.cli.json_output import json_print
+                    json_print("recall", data=result, next_actions=[
+                        {"command": "slm list --json", "description": "List recent memories"},
+                    ])
+                    return
+                if not result["results"]:
+                    print("No matching memories found.")
+                    return
+                # Text output
+                print(f"SpreadingActivation.search completed via daemon ({result.get('retrieval_time_ms', 0):.0f}ms)")
+                for i, r in enumerate(result["results"], 1):
+                    print(f"  {i}. [{r['score']:.2f}] {r['content']}")
+                return
+    except Exception:
+        pass  # Fall through to direct engine
     from superlocalmemory.core.config import SLMConfig
     from superlocalmemory.core.engine import MemoryEngine
-    use_json = getattr(args, 'json', False)
     try:
         config = SLMConfig.load()
         engine = MemoryEngine(config)

package/src/superlocalmemory/cli/daemon.py ADDED Viewed

@@ -0,0 +1,372 @@
+# Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
+# Licensed under the MIT License - see LICENSE file
+# Part of SuperLocalMemory V3 | https://qualixar.com | https://varunpratap.com
+"""SLM Daemon — keeps engine warm for instant CLI/MCP response.
+Problem: CLI cold start is 23s (embedding worker spawn + model load).
+Solution: Background daemon keeps MemoryEngine warm. CLI commands route
+requests through the daemon via localhost HTTP (~10ms overhead).
+Architecture:
+  slm serve       → starts daemon (engine init, workers warm, ~600MB RAM)
+  slm remember X  → HTTP POST to daemon → instant (no cold start)
+  slm recall X    → HTTP GET from daemon → instant
+  slm serve stop  → graceful shutdown, workers killed, RAM freed
+Auto-start: if daemon not running on CLI use, starts it automatically.
+Auto-shutdown: after 30 min idle (configurable via SLM_DAEMON_IDLE_TIMEOUT).
+Memory safety:
+  - RSS watchdog on embedding worker (2.5GB cap)
+  - Worker recycling every 5000 requests
+  - Parent watchdog kills workers if daemon dies
+  - SQLite WAL mode for concurrent access
+Part of Qualixar | Author: Varun Pratap Bhardwaj
+License: MIT
+"""
+from __future__ import annotations
+import json
+import logging
+import os
+import signal
+import sys
+import time
+from http.server import HTTPServer, BaseHTTPRequestHandler
+from pathlib import Path
+from threading import Thread
+logger = logging.getLogger(__name__)
+_DEFAULT_PORT = 8767
+_DEFAULT_IDLE_TIMEOUT = 1800  # 30 min
+_PID_FILE = Path.home() / ".superlocalmemory" / "daemon.pid"
+_PORT_FILE = Path.home() / ".superlocalmemory" / "daemon.port"
+# ---------------------------------------------------------------------------
+# Client: check if daemon running + send requests
+# ---------------------------------------------------------------------------
+def is_daemon_running() -> bool:
+    """Check if daemon is alive via PID file + HTTP health check."""
+    if not _PID_FILE.exists():
+        return False
+    try:
+        pid = int(_PID_FILE.read_text().strip())
+        os.kill(pid, 0)  # Check if process exists
+    except (ValueError, ProcessLookupError, PermissionError):
+        _PID_FILE.unlink(missing_ok=True)
+        return False
+    # PID exists — verify HTTP health
+    port = _get_port()
+    try:
+        import urllib.request
+        resp = urllib.request.urlopen(
+            f"http://127.0.0.1:{port}/health", timeout=2,
+        )
+        return resp.status == 200
+    except Exception:
+        return False
+def _get_port() -> int:
+    if _PORT_FILE.exists():
+        try:
+            return int(_PORT_FILE.read_text().strip())
+        except ValueError:
+            pass
+    return _DEFAULT_PORT
+def daemon_request(method: str, path: str, body: dict | None = None) -> dict | None:
+    """Send request to daemon. Returns parsed JSON or None on failure."""
+    port = _get_port()
+    try:
+        import urllib.request
+        url = f"http://127.0.0.1:{port}{path}"
+        data = json.dumps(body).encode() if body else None
+        headers = {"Content-Type": "application/json"} if data else {}
+        req = urllib.request.Request(url, data=data, headers=headers, method=method)
+        resp = urllib.request.urlopen(req, timeout=30)
+        return json.loads(resp.read().decode())
+    except Exception:
+        return None
+def ensure_daemon() -> bool:
+    """Start daemon if not running. Returns True if daemon is ready."""
+    if is_daemon_running():
+        return True
+    # Start daemon in background
+    import subprocess
+    cmd = [sys.executable, "-m", "superlocalmemory.cli.daemon", "--start"]
+    log_dir = Path.home() / ".superlocalmemory" / "logs"
+    log_dir.mkdir(parents=True, exist_ok=True)
+    log_file = log_dir / "daemon.log"
+    with open(log_file, "a") as lf:
+        subprocess.Popen(
+            cmd, stdout=lf, stderr=lf,
+            start_new_session=True,
+        )
+    # Wait for daemon to become ready (max 30s for cold start)
+    for _ in range(60):
+        time.sleep(0.5)
+        if is_daemon_running():
+            return True
+    return False
+def stop_daemon() -> bool:
+    """Stop the running daemon gracefully."""
+    if not _PID_FILE.exists():
+        return True
+    try:
+        pid = int(_PID_FILE.read_text().strip())
+        os.kill(pid, signal.SIGTERM)
+        # Wait for cleanup
+        for _ in range(20):
+            time.sleep(0.5)
+            try:
+                os.kill(pid, 0)
+            except ProcessLookupError:
+                break
+        _PID_FILE.unlink(missing_ok=True)
+        _PORT_FILE.unlink(missing_ok=True)
+        return True
+    except Exception:
+        return False
+# ---------------------------------------------------------------------------
+# Server: HTTP request handler with engine singleton
+# ---------------------------------------------------------------------------
+_engine = None
+_last_activity = time.monotonic()
+def _get_engine():
+    global _engine
+    if _engine is None:
+        from superlocalmemory.core.config import SLMConfig
+        from superlocalmemory.core.engine import MemoryEngine
+        config = SLMConfig.load()
+        _engine = MemoryEngine(config)
+        _engine.initialize()
+        # Force reranker warmup (blocking — daemon can afford to wait)
+        retrieval_eng = getattr(_engine, '_retrieval_engine', None)
+        if retrieval_eng:
+            reranker = getattr(retrieval_eng, '_reranker', None)
+            if reranker and hasattr(reranker, 'warmup_sync'):
+                reranker.warmup_sync(timeout=120)
+        logger.info("Daemon engine initialized and warm")
+    return _engine
+class DaemonHandler(BaseHTTPRequestHandler):
+    """Lightweight HTTP handler for daemon requests."""
+    def log_message(self, format, *args):
+        """Suppress default access logging."""
+        pass
+    def _send_json(self, status: int, data: dict) -> None:
+        self.send_response(status)
+        self.send_header("Content-Type", "application/json")
+        self.end_headers()
+        self.wfile.write(json.dumps(data).encode())
+    def _read_body(self) -> dict:
+        length = int(self.headers.get("Content-Length", 0))
+        if length == 0:
+            return {}
+        return json.loads(self.rfile.read(length).decode())
+    def do_GET(self) -> None:
+        global _last_activity
+        _last_activity = time.monotonic()
+        if self.path == "/health":
+            self._send_json(200, {"status": "ok", "pid": os.getpid()})
+            return
+        if self.path.startswith("/recall"):
+            try:
+                # Parse query from URL params
+                from urllib.parse import urlparse, parse_qs
+                params = parse_qs(urlparse(self.path).query)
+                query = params.get("q", [""])[0]
+                limit = int(params.get("limit", ["20"])[0])
+                engine = _get_engine()
+                response = engine.recall(query, limit=limit)
+                results = [
+                    {"content": r.fact.content, "score": round(r.score, 4),
+                     "fact_type": getattr(r.fact.fact_type, 'value', str(r.fact.fact_type)),
+                     "fact_id": r.fact.fact_id}
+                    for r in response.results
+                ]
+                self._send_json(200, {
+                    "results": results, "count": len(results),
+                    "query_type": response.query_type,
+                    "retrieval_time_ms": round(response.retrieval_time_ms, 1),
+                })
+            except Exception as exc:
+                self._send_json(500, {"error": str(exc)})
+            return
+        if self.path == "/list":
+            try:
+                engine = _get_engine()
+                facts = engine.list_facts(limit=50)
+                items = [
+                    {"content": f.content[:100], "fact_type": getattr(f.fact_type, 'value', str(f.fact_type)),
+                     "created_at": (f.created_at or "")[:19], "fact_id": f.fact_id}
+                    for f in facts
+                ]
+                self._send_json(200, {"results": items, "count": len(items)})
+            except Exception as exc:
+                self._send_json(500, {"error": str(exc)})
+            return
+        if self.path == "/status":
+            engine = _get_engine()
+            uptime = time.monotonic() - _server_start_time
+            self._send_json(200, {
+                "status": "running", "pid": os.getpid(),
+                "uptime_s": round(uptime),
+                "mode": engine._config.mode.value,
+                "fact_count": engine.fact_count,
+                "idle_s": round(time.monotonic() - _last_activity),
+            })
+            return
+        self._send_json(404, {"error": "not found"})
+    def do_POST(self) -> None:
+        global _last_activity
+        _last_activity = time.monotonic()
+        if self.path == "/remember":
+            try:
+                body = self._read_body()
+                content = body.get("content", "")
+                tags = body.get("tags", "")
+                if not content:
+                    self._send_json(400, {"error": "content required"})
+                    return
+                engine = _get_engine()
+                metadata = {"tags": tags} if tags else {}
+                fact_ids = engine.store(content, metadata=metadata)
+                self._send_json(200, {"fact_ids": fact_ids, "count": len(fact_ids)})
+            except Exception as exc:
+                self._send_json(500, {"error": str(exc)})
+            return
+        if self.path == "/stop":
+            self._send_json(200, {"status": "stopping"})
+            Thread(target=_shutdown_server, daemon=True).start()
+            return
+        self._send_json(404, {"error": "not found"})
+# ---------------------------------------------------------------------------
+# Server lifecycle
+# ---------------------------------------------------------------------------
+_server: HTTPServer | None = None
+_server_start_time = time.monotonic()
+def _shutdown_server() -> None:
+    global _engine, _server
+    time.sleep(0.5)
+    if _engine is not None:
+        try:
+            _engine.close()
+        except Exception:
+            pass
+        _engine = None
+    if _server is not None:
+        _server.shutdown()
+    _PID_FILE.unlink(missing_ok=True)
+    _PORT_FILE.unlink(missing_ok=True)
+def _idle_watchdog(timeout: int) -> None:
+    """Auto-shutdown after idle timeout."""
+    global _last_activity
+    while True:
+        time.sleep(30)
+        idle = time.monotonic() - _last_activity
+        if idle > timeout:
+            logger.info("Daemon idle for %ds, shutting down", int(idle))
+            _shutdown_server()
+            os._exit(0)
+def start_server(port: int = _DEFAULT_PORT, idle_timeout: int | None = None) -> None:
+    """Start the daemon HTTP server. Blocks until stopped."""
+    global _server, _server_start_time, _last_activity
+    idle_timeout = idle_timeout or int(os.environ.get(
+        "SLM_DAEMON_IDLE_TIMEOUT", str(_DEFAULT_IDLE_TIMEOUT),
+    ))
+    # Write PID + port files
+    _PID_FILE.parent.mkdir(parents=True, exist_ok=True)
+    _PID_FILE.write_text(str(os.getpid()))
+    _PORT_FILE.write_text(str(port))
+    # Handle SIGTERM for graceful shutdown
+    signal.signal(signal.SIGTERM, lambda *_: _shutdown_server() or os._exit(0))
+    # Pre-warm engine (this is the cold start — daemon absorbs it once)
+    logger.info("Daemon starting — warming engine...")
+    _get_engine()
+    logger.info("Engine warm. Daemon ready on port %d (idle timeout: %ds)", port, idle_timeout)
+    _server_start_time = time.monotonic()
+    _last_activity = time.monotonic()
+    # Start idle watchdog
+    Thread(target=_idle_watchdog, args=(idle_timeout,), daemon=True, name="idle-watchdog").start()
+    # Start HTTP server
+    # SO_REUSEADDR must be set on the class BEFORE __init__ calls bind()
+    HTTPServer.allow_reuse_address = True
+    _server = HTTPServer(("127.0.0.1", port), DaemonHandler)
+    try:
+        _server.serve_forever()
+    except KeyboardInterrupt:
+        pass
+    finally:
+        _shutdown_server()
+# ---------------------------------------------------------------------------
+# CLI entry point
+# ---------------------------------------------------------------------------
+if __name__ == "__main__":
+    logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s")
+    if "--start" in sys.argv:
+        start_server()
+    elif "--stop" in sys.argv:
+        stop_daemon()
+    else:
+        print("Usage: python -m superlocalmemory.cli.daemon --start|--stop")

package/src/superlocalmemory/cli/main.py CHANGED Viewed

@@ -191,6 +191,14 @@ def main() -> None:
         "--port", type=int, default=8765, help="Port (default 8765)",
     )
+    # V3.3.21: Daemon serve mode
+    serve_p = sub.add_parser("serve", help="Start/stop daemon for instant CLI response (~600MB RAM)")
+    serve_p.add_argument(
+        "action", nargs="?", default="start",
+        choices=["start", "stop", "status"],
+        help="start (default), stop, or status",
+    )
     # -- Profiles ------------------------------------------------------
     profile_p = sub.add_parser("profile", help="Profile management (list/switch/create)")
     profile_p.add_argument(