npm - superlocalmemory - Versions diffs - 3.4.22 → 3.4.24 - Mend

superlocalmemory 3.4.22 → 3.4.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

package/CHANGELOG.md +29 -0
package/package.json +1 -1
package/pyproject.toml +1 -1
package/skills/slm-build-graph/SKILL.md +1 -1
package/skills/slm-list-recent/SKILL.md +1 -1
package/skills/slm-recall/SKILL.md +1 -1
package/skills/slm-remember/SKILL.md +1 -1
package/skills/slm-status/SKILL.md +1 -1
package/skills/slm-switch-profile/SKILL.md +1 -1
package/src/superlocalmemory/__init__.py +3 -0
package/src/superlocalmemory/core/config.py +66 -18
package/src/superlocalmemory/core/context_cache.py +1 -1
package/src/superlocalmemory/core/embedding_worker.py +8 -27
package/src/superlocalmemory/core/embeddings.py +83 -1
package/src/superlocalmemory/core/engine_wiring.py +8 -0
package/src/superlocalmemory/core/platform_utils.py +127 -0
package/src/superlocalmemory/core/recall_worker.py +8 -24
package/src/superlocalmemory/core/reranker_worker.py +8 -24
package/src/superlocalmemory/core/worker_pool.py +2 -1
package/src/superlocalmemory/hooks/context_payload.py +1 -1
package/src/superlocalmemory/learning/database.py +1 -1
package/src/superlocalmemory/retrieval/reranker.py +2 -1
package/src/superlocalmemory/server/routes/brain.py +1 -1
package/src/superlocalmemory/server/routes/v3_api.py +150 -8
package/src/superlocalmemory/server/security_middleware.py +20 -2
package/src/superlocalmemory/server/unified_daemon.py +107 -5
package/src/superlocalmemory/ui/index.html +50 -1
package/src/superlocalmemory/ui/js/auto-settings.js +131 -5
package/src/superlocalmemory/ui/js/core.js +96 -1

package/src/superlocalmemory/core/recall_worker.py CHANGED Viewed

@@ -20,7 +20,6 @@ import json
 import os
 import signal
 import sys
-import threading
 # Force CPU BEFORE any torch import
 os.environ["CUDA_VISIBLE_DEVICES"] = ""
@@ -39,24 +38,10 @@ if sys.platform != "win32":
 def _start_parent_watchdog() -> None:
     """Monitor parent process — self-terminate if parent dies.
-    Prevents orphaned workers that consume 500+ MB each when the parent
-    process crashes, is killed, or exits without cleanup.
-    V3.3.7: Added after incident where orphaned workers consumed 33 GB.
+    V3.4.24: Delegates to platform_utils.start_parent_watchdog().
     """
-    parent_pid = os.getppid()
-    def _watch() -> None:
-        import time
-        while True:
-            time.sleep(5)
-            try:
-                os.kill(parent_pid, 0)
-            except OSError:
-                os._exit(0)
-    t = threading.Thread(target=_watch, daemon=True, name="parent-watchdog")
-    t.start()
+    from superlocalmemory.core.platform_utils import start_parent_watchdog
+    start_parent_watchdog()
 _engine = None
@@ -253,7 +238,8 @@ def _handle_status() -> dict:
 def _worker_main() -> None:
     """Main loop: read JSON requests from stdin, write responses to stdout."""
-    _start_parent_watchdog()  # V3.3.7: self-terminate if parent dies
+    _start_parent_watchdog()
+    from superlocalmemory.core.platform_utils import get_rss_mb
     for line in sys.stdin:
         line = line.strip()
@@ -326,11 +312,9 @@ def _worker_main() -> None:
         except Exception as exc:
             _respond({"ok": False, "error": str(exc)})
-        # V3.3.16: RSS watchdog — self-terminate if memory exceeds 1.5GB.
-        # Parent auto-respawns a fresh worker on next request.
-        import resource
-        rss_mb = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024 / 1024
-        if rss_mb > 2500:
+        # V3.3.16: RSS watchdog — V3.4.24: cross-platform via platform_utils.
+        rss_mb = get_rss_mb()
+        if rss_mb > 0 and rss_mb > 2500:
             sys.exit(0)

package/src/superlocalmemory/core/reranker_worker.py CHANGED Viewed

@@ -31,7 +31,6 @@ import platform
 import signal
 import struct
 import sys
-import threading
 # Force CPU BEFORE any torch import
 os.environ["CUDA_VISIBLE_DEVICES"] = ""
@@ -52,25 +51,10 @@ if sys.platform != "win32":
 def _start_parent_watchdog() -> None:
     """Monitor parent process — self-terminate if parent dies.
-    Prevents orphaned workers that consume 1+ GB each when the parent
-    process crashes, is killed, or exits without cleanup.
-    V3.3.7: Added after incident where ~30 orphaned workers consumed 33 GB.
+    V3.4.24: Delegates to platform_utils.start_parent_watchdog().
     """
-    parent_pid = os.getppid()
-    def _watch() -> None:
-        import time
-        while True:
-            time.sleep(5)
-            try:
-                os.kill(parent_pid, 0)  # Check if parent is alive (signal 0)
-            except OSError:
-                # Parent is dead — self-terminate
-                os._exit(0)
-    t = threading.Thread(target=_watch, daemon=True, name="parent-watchdog")
-    t.start()
+    from superlocalmemory.core.platform_utils import start_parent_watchdog
+    start_parent_watchdog()
 def _detect_onnx_variant(model_name: str = "") -> str:
@@ -101,7 +85,8 @@ def _detect_onnx_variant(model_name: str = "") -> str:
 def _worker_main() -> None:
     """Main loop: read JSON requests from stdin, write responses to stdout."""
-    _start_parent_watchdog()  # V3.3.7: self-terminate if parent dies
+    _start_parent_watchdog()
+    from superlocalmemory.core.platform_utils import get_rss_mb
     model = None
     active_backend = ""
@@ -194,10 +179,9 @@ def _worker_main() -> None:
             except Exception as exc:
                 _respond({"ok": False, "error": str(exc)})
-            # V3.3.16: RSS watchdog — same as embedding_worker
-            import resource
-            rss_mb = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024 / 1024
-            if rss_mb > 2500:
+            # V3.3.16: RSS watchdog — V3.4.24: cross-platform via platform_utils.
+            rss_mb = get_rss_mb()
+            if rss_mb > 0 and rss_mb > 2500:
                 sys.exit(0)
             continue

package/src/superlocalmemory/core/worker_pool.py CHANGED Viewed

@@ -247,6 +247,7 @@ class WorkerPool:
                 "TOKENIZERS_PARALLELISM": "false",
                 "TORCH_DEVICE": "cpu",
             }
+            from superlocalmemory.core.platform_utils import popen_platform_kwargs
             self._proc = subprocess.Popen(
                 [sys.executable, "-m", "superlocalmemory.core.recall_worker"],
                 stdin=subprocess.PIPE,
@@ -255,7 +256,7 @@ class WorkerPool:
                 text=True,
                 bufsize=1,
                 env=env,
-                start_new_session=True,  # Prevent terminal signals bleeding to worker
+                **popen_platform_kwargs(),
             )
             logger.info("Recall worker spawned (PID %d)", self._proc.pid)
         except Exception as exc:

package/src/superlocalmemory/hooks/context_payload.py CHANGED Viewed

@@ -22,7 +22,7 @@ from typing import Callable, Iterable
 from superlocalmemory.core.security_primitives import redact_secrets
-VERSION = "3.4.22"
+VERSION = "3.4.23"
 DEFAULT_TOP_K = 10
 DEFAULT_DECISIONS_K = 5
 DEFAULT_MEMORIES_K = 10

package/src/superlocalmemory/learning/database.py CHANGED Viewed

@@ -395,7 +395,7 @@ class LearningDatabase:
         feature_names: list[str],
         trained_on_count: int,
         metrics: dict,
-        model_version: str = "3.4.22",
+        model_version: str = "3.4.23",
     ) -> int:
         """Persist a newly trained model and flip the active flag.

package/src/superlocalmemory/retrieval/reranker.py CHANGED Viewed

@@ -193,6 +193,7 @@ class CrossEncoderReranker:
                 "TOKENIZERS_PARALLELISM": "false",
                 "TORCH_DEVICE": "cpu",
             }
+            from superlocalmemory.core.platform_utils import popen_platform_kwargs
             self._worker_proc = subprocess.Popen(
                 [sys.executable, "-m", worker_module],
                 stdin=subprocess.PIPE,
@@ -201,7 +202,7 @@ class CrossEncoderReranker:
                 text=True,
                 bufsize=1,
                 env=env,
-                start_new_session=True,
+                **popen_platform_kwargs(),
             )
             # v3.4.13: Register PID for machine-wide singleton
             _RERANKER_PID_FILE.parent.mkdir(parents=True, exist_ok=True)

package/src/superlocalmemory/server/routes/brain.py CHANGED Viewed

@@ -64,7 +64,7 @@ router = APIRouter(prefix="/api/v3", tags=["brain"])
 # LLD-03 v2 stratum space = 4 query types × 3 entity bins × 4 time buckets.
 _STRATA_TOTAL: int = 48
-_VERSION: str = "3.4.22"
+_VERSION: str = "3.4.23"
 # Banned metric names (LLD-04 U4). Kept as a tuple for grep visibility;
 # the source-level test asserts we don't accidentally reintroduce them.

package/src/superlocalmemory/server/routes/v3_api.py CHANGED Viewed

@@ -129,6 +129,11 @@ async def set_mode(request: Request):
             llm_model=old_config.llm.model,
             llm_api_key=old_config.llm.api_key,
             llm_api_base=old_config.llm.api_base,
+            embedding_provider=old_config.embedding.provider,
+            embedding_endpoint=old_config.embedding.api_endpoint,
+            embedding_key=old_config.embedding.api_key,
+            embedding_model_name=old_config.embedding.model_name,
+            embedding_dimension=old_config.embedding.dimension,
         )
         new_config.active_profile = old_config.active_profile
         new_config.save()
@@ -165,7 +170,10 @@ async def set_mode(request: Request):
 @router.post("/mode/set")
 async def set_full_config(request: Request):
-    """Save mode + provider + model + API key together."""
+    """Save mode + provider + model + API key together.
+    V3.4.24: Also accepts embedding_* fields for custom embedding endpoints.
+    """
     try:
         body = await request.json()
         new_mode = body.get("mode", "a").lower()
@@ -187,6 +195,11 @@ async def set_full_config(request: Request):
             llm_model=model,
             llm_api_key=api_key,
             llm_api_base="http://localhost:11434" if provider == "ollama" else "",
+            embedding_provider=body.get("embedding_provider", ""),
+            embedding_endpoint=body.get("embedding_endpoint", ""),
+            embedding_key=body.get("embedding_key", ""),
+            embedding_model_name=body.get("embedding_model", ""),
+            embedding_dimension=int(body.get("embedding_dimension", 0) or 0),
         )
         config.active_profile = old.active_profile
         config.save()
@@ -213,11 +226,145 @@ async def set_full_config(request: Request):
             "mode": new_mode,
             "provider": provider,
             "model": model,
+            "embedding_provider": config.embedding.provider,
+            "embedding_model": config.embedding.model_name,
+            "embedding_dimension": config.embedding.dimension,
+        }
+    except Exception as e:
+        return JSONResponse({"error": str(e)}, status_code=500)
+# ── V3.4.24: Embedding Configuration ────────────────────────────────
+@router.get("/embedding/config")
+async def get_embedding_config(request: Request):
+    """Return current embedding configuration."""
+    try:
+        from superlocalmemory.core.config import SLMConfig
+        config = SLMConfig.load()
+        emb = config.embedding
+        return {
+            "provider": emb.provider,
+            "model_name": emb.model_name,
+            "dimension": emb.dimension,
+            "api_endpoint": emb.api_endpoint,
+            "has_key": bool(emb.api_key),
+            "is_openai_compatible": emb.is_openai_compatible,
+            "mode": config.mode.value,
+        }
+    except Exception as e:
+        return JSONResponse({"error": str(e)}, status_code=500)
+@router.put("/embedding/config")
+async def set_embedding_config(request: Request):
+    """Update embedding configuration independently of mode switch."""
+    try:
+        body = await request.json()
+        from superlocalmemory.core.config import SLMConfig, EmbeddingConfig
+        config = SLMConfig.load()
+        new_provider = body.get("provider", config.embedding.provider)
+        new_model = body.get("model_name", config.embedding.model_name)
+        new_dim = int(body.get("dimension", config.embedding.dimension) or 768)
+        if not (64 <= new_dim <= 8192):
+            return JSONResponse({"error": f"Dimension must be 64-8192, got {new_dim}"}, status_code=400)
+        new_endpoint = body.get("api_endpoint", config.embedding.api_endpoint)
+        new_key = body.get("api_key", config.embedding.api_key)
+        old_emb = config.embedding
+        config.embedding = EmbeddingConfig(
+            model_name=new_model,
+            dimension=new_dim,
+            provider=new_provider,
+            api_endpoint=new_endpoint,
+            api_key=new_key,
+            ollama_model=old_emb.ollama_model,
+            ollama_base_url=old_emb.ollama_base_url,
+            api_version=old_emb.api_version,
+            deployment_name=old_emb.deployment_name,
+        )
+        config.save()
+        needs_reindex = (
+            old_emb.provider != new_provider
+            or old_emb.model_name != new_model
+            or old_emb.dimension != new_dim
+        )
+        # Kill workers so next request uses new config
+        try:
+            from superlocalmemory.core.worker_pool import WorkerPool
+            WorkerPool.shared().shutdown()
+        except Exception:
+            pass
+        if hasattr(request.app.state, "engine"):
+            request.app.state.engine = None
+        return {
+            "success": True,
+            "provider": new_provider,
+            "model_name": new_model,
+            "dimension": new_dim,
+            "needs_reindex": needs_reindex,
         }
     except Exception as e:
         return JSONResponse({"error": str(e)}, status_code=500)
+@router.post("/embedding/test")
+async def test_embedding_endpoint(request: Request):
+    """Test connectivity to a custom embedding endpoint."""
+    try:
+        import httpx
+        from urllib.parse import urlparse
+        body = await request.json()
+        endpoint = body.get("api_endpoint", "").rstrip("/")
+        model = body.get("model_name", "test")
+        api_key = body.get("api_key", "")
+        if not endpoint:
+            return JSONResponse({"error": "No endpoint provided"}, status_code=400)
+        parsed = urlparse(endpoint)
+        if parsed.scheme not in ("http", "https"):
+            return JSONResponse({"error": "Only http/https endpoints supported"}, status_code=400)
+        host = parsed.hostname or ""
+        if host in ("169.254.169.254", "metadata.google.internal"):
+            return JSONResponse({"error": "Cloud metadata endpoints not allowed"}, status_code=400)
+        if not endpoint.endswith("/embeddings"):
+            endpoint = f"{endpoint}/embeddings"
+        headers = {"Content-Type": "application/json"}
+        if api_key:
+            headers["Authorization"] = f"Bearer {api_key}"
+        payload = {"input": ["test embedding connection"], "model": model}
+        with httpx.Client(timeout=httpx.Timeout(15.0)) as client:
+            resp = client.post(endpoint, headers=headers, json=payload)
+            resp.raise_for_status()
+            data = resp.json()
+            emb_data = data.get("data", [])
+            if emb_data:
+                dim = len(emb_data[0].get("embedding", []))
+                return {
+                    "success": True,
+                    "message": f"Connected! Dimension: {dim}",
+                    "dimension": dim,
+                }
+            return {"success": False, "error": "No embedding data returned"}
+    except httpx.HTTPStatusError as e:
+        return {"success": False, "error": f"HTTP {e.response.status_code}"}
+    except httpx.ConnectError:
+        return {"success": False, "error": "Cannot reach the embedding server. Is it running?"}
+    except httpx.TimeoutException:
+        return {"success": False, "error": "Connection timed out after 15 seconds."}
+    except Exception as e:
+        return {"success": False, "error": type(e).__name__}
 @router.post("/provider/test")
 async def test_provider(request: Request):
     """Test connectivity to an LLM provider."""
@@ -1593,13 +1740,8 @@ async def process_health(request: Request):
         processes["worker_pool"] = {"status": worker_status}
         # Memory usage of current process (approximate)
-        memory_mb = 0.0
-        try:
-            import resource
-            usage = resource.getrusage(resource.RUSAGE_SELF)
-            memory_mb = round(usage.ru_maxrss / (1024 * 1024), 1)
-        except Exception:
-            pass
+        from superlocalmemory.core.platform_utils import get_rss_mb
+        memory_mb = round(get_rss_mb(), 1)
         return {
             "processes": processes,

package/src/superlocalmemory/server/security_middleware.py CHANGED Viewed

@@ -56,9 +56,27 @@ class SecurityHeadersMiddleware(BaseHTTPMiddleware):
         # Control referrer information leakage
         response.headers["Referrer-Policy"] = "strict-origin-when-cross-origin"
-        # Prevent caching of sensitive data (for API endpoints)
-        if request.url.path.startswith("/api/"):
+        # v3.4.23: Cache-Control strategy
+        # ---------------------------------------------------------------
+        # Three classes of paths, three policies:
+        #
+        #   /api/*        -> no-store (sensitive data, never cache)
+        #   index.html    -> no-cache, must-revalidate (always revalidate)
+        #   /static/*     -> no-cache, must-revalidate (always revalidate
+        #                    with ETag; fast reloads but never stale-after-
+        #                    upgrade)
+        #
+        # Before v3.4.23 only /api/* had cache headers. Browsers then cached
+        # JS/CSS/HTML aggressively via default heuristics, and after a daemon
+        # upgrade the dashboard showed an infinite spinner because old cached
+        # JS was calling endpoints with stale response shapes. "no-cache"
+        # (not "no-store") still allows 304s on unchanged files, so reload
+        # cost stays low.
+        path = request.url.path
+        if path.startswith("/api/"):
             response.headers["Cache-Control"] = "no-store, no-cache, must-revalidate"
             response.headers["Pragma"] = "no-cache"
+        elif path == "/" or path.endswith(".html") or path.startswith("/static/"):
+            response.headers["Cache-Control"] = "no-cache, must-revalidate"
         return response

package/src/superlocalmemory/server/unified_daemon.py CHANGED Viewed

@@ -495,9 +495,20 @@ async def lifespan(application: FastAPI):
     global _start_time
     _start_time = time.monotonic()
     _last_activity = time.monotonic()
-    logger.info("Unified daemon ready on port %d (24/7 mode)" if idle_timeout <= 0
-                else "Unified daemon ready on port %d (idle timeout: %ds)",
-                _DEFAULT_PORT, idle_timeout)
+    # v3.4.23: pre-format the ready message. Previous code passed a ternary as
+    # the log format string with a fixed 2-arg tuple; when idle_timeout<=0 the
+    # chosen branch had only one %d, triggering a TypeError on every startup.
+    # Python's logging module then wrote the full stack to stderr. Because the
+    # call runs inside FastAPI's stacked merged_lifespan, each dump was ~30 KB
+    # and the error log grew to tens of MB within a day.
+    if idle_timeout <= 0:
+        _ready_msg = f"Unified daemon ready on port {_DEFAULT_PORT} (24/7 mode)"
+    else:
+        _ready_msg = (
+            f"Unified daemon ready on port {_DEFAULT_PORT} "
+            f"(idle timeout: {idle_timeout}s)"
+        )
+    logger.info(_ready_msg)
     yield
@@ -850,7 +861,18 @@ def _register_dashboard_routes(application: FastAPI) -> None:
     _data_io_mod.ws_manager = ws_manager
     # Root page
-    from fastapi.responses import HTMLResponse
+    from fastapi.responses import HTMLResponse, JSONResponse
+    # v3.4.23: /api/version — dashboard polls this to detect daemon upgrades
+    # and auto-reload stale tabs (see ui/js/core.js::checkVersionFingerprint).
+    try:
+        from superlocalmemory import __version__ as _SLM_VERSION
+    except Exception:  # pragma: no cover — defensive
+        _SLM_VERSION = "unknown"
+    @application.get("/api/version")
+    async def api_version():
+        return JSONResponse({"version": _SLM_VERSION})
     @application.get("/", response_class=HTMLResponse)
     async def root():
@@ -863,7 +885,11 @@ def _register_dashboard_routes(application: FastAPI) -> None:
                 "<p><a href='/docs'>API Documentation</a></p>"
                 "</body></html>"
             )
-        return index_path.read_text()
+        # v3.4.23: substitute version placeholder so the dashboard can detect
+        # upgrades and auto-reload. Read fresh each request (daemon uptime is
+        # days, but we want zero caching surprises during development).
+        html = index_path.read_text()
+        return html.replace("__SLM_VERSION__", _SLM_VERSION)
     # Startup event for event listener
     @application.on_event("startup")
@@ -1066,6 +1092,13 @@ def start_server(port: int = _DEFAULT_PORT) -> None:
     global _start_time
     import uvicorn
+    # v3.4.23: rotate oversized logs before anything else so both the CLI
+    # path (`slm serve`) and the LaunchAgent path (__main__) are covered.
+    try:
+        rotate_oversized_logs()
+    except Exception:
+        pass  # never block startup on log housekeeping
     _PID_FILE.parent.mkdir(parents=True, exist_ok=True)
     _PID_FILE.write_text(str(os.getpid()))
     _PORT_FILE.write_text(str(port))
@@ -1094,11 +1127,80 @@ def start_server(port: int = _DEFAULT_PORT) -> None:
         _PORT_FILE.unlink(missing_ok=True)
+# ---------------------------------------------------------------------------
+# v3.4.23 — Startup log rotation
+# ---------------------------------------------------------------------------
+# The LaunchAgent plist redirects stdout/stderr to daemon.log and
+# daemon-error.log. Those files are managed by launchd, not Python, so
+# Python's RotatingFileHandler cannot prune them. If any bug ever writes
+# large amounts of data to stderr (the v3.4.22 logger-format bug produced
+# ~30 KB per startup and the file grew to 69 MB), end users end up with a
+# disk-eating log they never knew existed.
+#
+# rotate_oversized_logs() is a belt-and-suspenders guard: every time the
+# daemon starts, if either log exceeds MAX_LOG_BYTES we rename the current
+# file to ".1" (keeping one rotated copy) and truncate the original so
+# launchd's open file descriptor keeps working. This is cheap, stateless,
+# and independent of whatever caused the overflow.
+# ---------------------------------------------------------------------------
+_MAX_LOG_BYTES = 10 * 1024 * 1024  # 10 MB
+def rotate_oversized_logs(log_dir: Optional[Path] = None,
+                          max_bytes: int = _MAX_LOG_BYTES) -> None:
+    """Rotate daemon.log and daemon-error.log at startup if oversized.
+    Keeps one rotated copy (.1). Safe under concurrent start attempts:
+    rename is atomic on POSIX, and truncation is idempotent.
+    """
+    log_dir = log_dir or (Path.home() / ".superlocalmemory" / "logs")
+    try:
+        log_dir.mkdir(parents=True, exist_ok=True)
+    except Exception:
+        return
+    for name in ("daemon.log", "daemon-error.log", "daemon.json.log"):
+        path = log_dir / name
+        try:
+            if not path.exists() or path.stat().st_size <= max_bytes:
+                continue
+            rotated = log_dir / f"{name}.1"
+            try:
+                if rotated.exists():
+                    rotated.unlink()
+            except Exception:
+                pass
+            try:
+                path.rename(rotated)
+            except Exception:
+                # If rename fails (e.g., file is the open stderr fd under
+                # launchd), fall back to truncation so we at least reclaim
+                # disk without breaking the redirect.
+                try:
+                    with open(path, "w"):
+                        pass
+                except Exception:
+                    pass
+                continue
+            # Re-create the original path as empty so launchd's redirect
+            # keeps appending to a fresh file.
+            try:
+                path.touch()
+            except Exception:
+                pass
+        except Exception:
+            # Log rotation must never prevent daemon startup.
+            continue
 # ---------------------------------------------------------------------------
 # CLI entry point
 # ---------------------------------------------------------------------------
 if __name__ == "__main__":
+    # Rotate first, then configure logging, so the first log line lands in a
+    # freshly-sized file.
+    rotate_oversized_logs()
     logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s")
     port = _DEFAULT_PORT
     for arg in sys.argv:

package/src/superlocalmemory/ui/index.html CHANGED Viewed

@@ -3,6 +3,10 @@
 <head>
     <meta charset="UTF-8">
     <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <!-- v3.4.23: server substitutes __SLM_VERSION__ at serve time. core.js
+         compares this to /api/version and hard-reloads + clears localStorage
+         on mismatch, so the browser cannot show stale UI after an upgrade. -->
+    <meta name="slm-version" content="__SLM_VERSION__">
     <title>SuperLocalMemory V3 — Dashboard</title>
     <!-- Bootstrap CSS (vendored locally v3.4.21 — no CDN calls, works offline) -->
@@ -1003,8 +1007,53 @@
                             </div>
                         </div>
+                        <!-- Step 3: Embedding Configuration (V3.4.24) -->
+                        <div class="mt-3 pt-3 border-top" id="settings-embedding-panel">
+                            <h6 class="text-muted"><i class="bi bi-cpu"></i> Step 3: Embedding Model</h6>
+                            <p class="small text-muted mb-2">
+                                Controls how text is converted to vectors for semantic search.
+                                Default: local model (768d). Custom: any OpenAI-compatible endpoint.
+                            </p>
+                            <div class="row g-2 mb-2">
+                                <div class="col-md-4">
+                                    <label class="form-label small">Embedding Provider</label>
+                                    <select class="form-select form-select-sm" id="settings-emb-provider">
+                                        <option value="default">Default (Local Model)</option>
+                                        <option value="openai">Custom Endpoint (OpenAI-compatible)</option>
+                                    </select>
+                                </div>
+                                <div class="col-md-4" id="settings-emb-model-col" style="display:none;">
+                                    <label class="form-label small">Model Name</label>
+                                    <input type="text" id="settings-emb-model" class="form-control form-control-sm" placeholder="e.g. Qwen3-Embedding">
+                                </div>
+                                <div class="col-md-4" id="settings-emb-dim-col" style="display:none;">
+                                    <label class="form-label small">Dimension</label>
+                                    <input type="number" id="settings-emb-dimension" class="form-control form-control-sm" placeholder="e.g. 1024" min="64" max="8192">
+                                </div>
+                            </div>
+                            <div class="row g-2 mb-2" id="settings-emb-endpoint-row" style="display:none;">
+                                <div class="col-md-8">
+                                    <label class="form-label small">Embedding Endpoint</label>
+                                    <input type="text" id="settings-emb-endpoint" class="form-control form-control-sm" placeholder="http://localhost:8045/v1/embeddings">
+                                </div>
+                                <div class="col-md-4">
+                                    <label class="form-label small">API Key (optional)</label>
+                                    <input type="password" id="settings-emb-key" class="form-control form-control-sm" placeholder="not-needed">
+                                </div>
+                            </div>
+                            <div id="settings-emb-test-row" style="display:none;">
+                                <button class="btn btn-sm btn-outline-info" id="settings-emb-test-btn">
+                                    <i class="bi bi-lightning"></i> Test Embedding
+                                </button>
+                                <span id="settings-emb-test-result" class="ms-2 small"></span>
+                            </div>
+                            <div id="settings-emb-info" class="small text-muted mt-1">
+                                Using local <strong>nomic-embed-text-v1.5</strong> (768d)
+                            </div>
+                        </div>
                         <!-- Save button -->
-                        <div class="mt-2">
+                        <div class="mt-3">
                             <button class="btn btn-primary" id="settings-save-all">
                                 <i class="bi bi-check-circle"></i> Save Configuration
                             </button>