npm - superlocalmemory - Versions diffs - 3.4.23 → 3.4.24 - Mend

superlocalmemory 3.4.23 → 3.4.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

package/package.json +1 -1
package/pyproject.toml +1 -1
package/src/superlocalmemory/__init__.py +1 -1
package/src/superlocalmemory/core/config.py +66 -18
package/src/superlocalmemory/core/embedding_worker.py +8 -27
package/src/superlocalmemory/core/embeddings.py +83 -1
package/src/superlocalmemory/core/engine_wiring.py +8 -0
package/src/superlocalmemory/core/platform_utils.py +127 -0
package/src/superlocalmemory/core/recall_worker.py +8 -24
package/src/superlocalmemory/core/reranker_worker.py +8 -24
package/src/superlocalmemory/core/worker_pool.py +2 -1
package/src/superlocalmemory/retrieval/reranker.py +2 -1
package/src/superlocalmemory/server/routes/v3_api.py +150 -8
package/src/superlocalmemory/ui/index.html +46 -1
package/src/superlocalmemory/ui/js/auto-settings.js +131 -5
package/src/superlocalmemory.egg-info/PKG-INFO +0 -655
package/src/superlocalmemory.egg-info/SOURCES.txt +0 -426
package/src/superlocalmemory.egg-info/dependency_links.txt +0 -1
package/src/superlocalmemory.egg-info/entry_points.txt +0 -2
package/src/superlocalmemory.egg-info/requires.txt +0 -58
package/src/superlocalmemory.egg-info/top_level.txt +0 -1

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "superlocalmemory",
-  "version": "3.4.23",
+  "version": "3.4.24",
   "description": "Information-geometric agent memory with mathematical guarantees. 4-channel retrieval, Fisher-Rao similarity, zero-LLM mode, EU AI Act compliant. Works with Claude, Cursor, Windsurf, and 17+ AI tools.",
   "keywords": [
     "ai-memory",

package/pyproject.toml CHANGED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "superlocalmemory"
-version = "3.4.23"
+version = "3.4.24"
 description = "Information-geometric agent memory with mathematical guarantees"
 readme = "README.md"
 license = {text = "AGPL-3.0-or-later"}

package/src/superlocalmemory/__init__.py CHANGED Viewed

@@ -1,3 +1,3 @@
 """SuperLocalMemory — information-geometric agent memory."""
-__version__ = "3.4.23"
+__version__ = "3.4.24"

package/src/superlocalmemory/core/config.py CHANGED Viewed

@@ -37,7 +37,8 @@ class EmbeddingConfig:
     model_name: str = "nomic-ai/nomic-embed-text-v1.5"
     dimension: int = 768
-    # Provider: "" = auto-detect, "sentence-transformers", "ollama", "cloud"
+    # Provider: "" = auto-detect, "sentence-transformers", "ollama", "cloud",
+    # "openai" (V3.4.24: any OpenAI-compatible /v1/embeddings endpoint)
     provider: str = ""
     # Ollama settings (used when provider="ollama" or auto-detected)
     ollama_model: str = "nomic-embed-text"
@@ -50,12 +51,19 @@ class EmbeddingConfig:
     @property
     def is_cloud(self) -> bool:
+        if self.provider == "openai":
+            return False
         return bool(self.api_endpoint) or self.provider == "cloud"
     @property
     def is_ollama(self) -> bool:
         return self.provider == "ollama"
+    @property
+    def is_openai_compatible(self) -> bool:
+        """V3.4.24: True when using a custom OpenAI-compatible endpoint."""
+        return self.provider == "openai" and bool(self.api_endpoint)
 # ---------------------------------------------------------------------------
 # LLM Config
@@ -639,6 +647,8 @@ class SLMConfig:
             embedding_endpoint=emb_data.get("api_endpoint", ""),
             embedding_key=emb_data.get("api_key", ""),
             embedding_deployment=emb_data.get("deployment_name", ""),
+            embedding_model_name=emb_data.get("model_name", ""),
+            embedding_dimension=int(emb_data.get("dimension", 0) or 0),
         )
         config.active_profile = data.get("active_profile", "default")
@@ -787,20 +797,34 @@ class SLMConfig:
         embedding_endpoint: str = "",
         embedding_key: str = "",
         embedding_deployment: str = "",
+        embedding_model_name: str = "",
+        embedding_dimension: int = 0,
     ) -> SLMConfig:
         """Create config with mode-appropriate defaults."""
         _base = base_dir or DEFAULT_BASE_DIR
         if mode == Mode.A:
+            # V3.4.24: If user chose "openai" provider, honour their custom
+            # endpoint/model/dimension. Otherwise use local defaults.
+            _a_provider = embedding_provider or "sentence-transformers"
+            if _a_provider == "openai" and embedding_endpoint:
+                _a_emb = EmbeddingConfig(
+                    model_name=embedding_model_name or "nomic-ai/nomic-embed-text-v1.5",
+                    dimension=embedding_dimension or 768,
+                    provider="openai",
+                    api_endpoint=embedding_endpoint,
+                    api_key=embedding_key,
+                )
+            else:
+                _a_emb = EmbeddingConfig(
+                    model_name="nomic-ai/nomic-embed-text-v1.5",
+                    dimension=768,
+                    provider=_a_provider,
+                )
             return cls(
                 mode=mode,
                 base_dir=_base,
-                embedding=EmbeddingConfig(
-                    model_name="nomic-ai/nomic-embed-text-v1.5",
-                    dimension=768,
-                    # Mode A: sentence-transformers in SUBPROCESS (never in-process)
-                    provider=embedding_provider or "sentence-transformers",
-                ),
+                embedding=_a_emb,
                 llm=LLMConfig(),  # No LLM
                 retrieval=RetrievalConfig(
                     # V3.3.2: ONNX cross-encoder enabled for all modes (~200MB)
@@ -816,15 +840,27 @@ class SLMConfig:
             )
         if mode == Mode.B:
+            # V3.4.24: If user chose "openai" provider with a custom endpoint
+            # (e.g. local vLLM, LiteLLM, Ollama /v1), honour it.
+            _b_provider = embedding_provider or "ollama"
+            if _b_provider == "openai" and embedding_endpoint:
+                _b_emb = EmbeddingConfig(
+                    model_name=embedding_model_name or "nomic-ai/nomic-embed-text-v1.5",
+                    dimension=embedding_dimension or 768,
+                    provider="openai",
+                    api_endpoint=embedding_endpoint,
+                    api_key=embedding_key,
+                )
+            else:
+                _b_emb = EmbeddingConfig(
+                    model_name="nomic-ai/nomic-embed-text-v1.5",
+                    dimension=768,
+                    provider=_b_provider,
+                )
             return cls(
                 mode=mode,
                 base_dir=_base,
-                embedding=EmbeddingConfig(
-                    model_name="nomic-ai/nomic-embed-text-v1.5",
-                    dimension=768,
-                    # Mode B: Ollama HTTP API (zero PyTorch in-process)
-                    provider=embedding_provider or "ollama",
-                ),
+                embedding=_b_emb,
                 llm=LLMConfig(
                     provider=llm_provider or "ollama",
                     model=llm_model or "llama3.2",
@@ -841,16 +877,28 @@ class SLMConfig:
         # Don't carry over local-only providers (ollama) to cloud mode
         c_provider = llm_provider if llm_provider not in ("ollama", "") else "openrouter"
         c_model = llm_model if llm_provider not in ("ollama", "") else "anthropic/claude-sonnet-4"
-        return cls(
-            mode=mode,
-            base_dir=_base,
-            embedding=EmbeddingConfig(
+        # V3.4.24: If user chose "openai" provider, honour it in Mode C too.
+        _c_emb_provider = embedding_provider or ""
+        if _c_emb_provider == "openai" and embedding_endpoint:
+            _c_emb = EmbeddingConfig(
+                model_name=embedding_model_name or "text-embedding-3-large",
+                dimension=embedding_dimension or 3072,
+                provider="openai",
+                api_endpoint=embedding_endpoint,
+                api_key=embedding_key,
+            )
+        else:
+            _c_emb = EmbeddingConfig(
                 model_name="text-embedding-3-large",
                 dimension=3072,
                 api_endpoint=embedding_endpoint,
                 api_key=embedding_key,
                 deployment_name=embedding_deployment,
-            ),
+            )
+        return cls(
+            mode=mode,
+            base_dir=_base,
+            embedding=_c_emb,
             llm=LLMConfig(
                 provider=c_provider,
                 model=c_model,

package/src/superlocalmemory/core/embedding_worker.py CHANGED Viewed

@@ -26,7 +26,6 @@ import json
 import os
 import signal
 import sys
-import threading
 # Force CPU BEFORE any torch import
 os.environ["CUDA_VISIBLE_DEVICES"] = ""
@@ -47,24 +46,10 @@ if sys.platform != "win32":
 def _start_parent_watchdog() -> None:
     """Monitor parent process — self-terminate if parent dies.
-    Prevents orphaned workers that consume 500-800 MB each when the parent
-    process crashes, is killed, or exits without cleanup.
-    V3.3.7: Added after incident where orphaned workers consumed 33 GB.
+    V3.4.24: Delegates to platform_utils.start_parent_watchdog().
     """
-    parent_pid = os.getppid()
-    def _watch() -> None:
-        import time
-        while True:
-            time.sleep(5)
-            try:
-                os.kill(parent_pid, 0)
-            except OSError:
-                os._exit(0)
-    t = threading.Thread(target=_watch, daemon=True, name="parent-watchdog")
-    t.start()
+    from superlocalmemory.core.platform_utils import start_parent_watchdog
+    start_parent_watchdog()
 def _load_embedding_model(name: str) -> tuple:
@@ -97,9 +82,10 @@ def _load_embedding_model(name: str) -> tuple:
 def _worker_main() -> None:
     """Main loop: read JSON requests from stdin, write responses to stdout."""
-    _start_parent_watchdog()  # V3.3.7: self-terminate if parent dies
+    _start_parent_watchdog()
     import numpy as np
+    from superlocalmemory.core.platform_utils import get_rss_mb
     model = None
     model_name = None
@@ -164,15 +150,10 @@ def _worker_main() -> None:
             except Exception as exc:
                 _respond({"ok": False, "error": str(exc)})
-            # V3.3.16: RSS watchdog — self-terminate if memory exceeds limit.
-            # PyTorch on ARM64 Mac never returns memory to OS. After ~200 embeds
-            # a worker that started at 300MB grows to 17GB+. Parent auto-respawns
-            # a fresh worker on next request (existing mechanism in embeddings.py).
-            # V3.3.21: Configurable via SLM_EMBED_WORKER_RSS_LIMIT_MB (default 2500MB).
-            import resource
+            # V3.3.16: RSS watchdog — V3.4.24: cross-platform via platform_utils.
             _rss_limit = int(os.environ.get("SLM_EMBED_WORKER_RSS_LIMIT_MB", 4000))
-            rss_mb = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024 / 1024
-            if rss_mb > _rss_limit:
+            rss_mb = get_rss_mb()
+            if rss_mb > 0 and rss_mb > _rss_limit:
                 sys.exit(0)
             continue

package/src/superlocalmemory/core/embeddings.py CHANGED Viewed

@@ -178,6 +178,7 @@ class EmbeddingService:
         self._idle_timer: threading.Timer | None = None
         self._worker_ready = False
         self._request_count: int = 0
+        self._http_client: object | None = None
         # Register for atexit cleanup (prevent orphaned workers)
         ref = weakref.ref(self, _live_embedding_services.discard)
@@ -189,10 +190,17 @@ class EmbeddingService:
             self._kill_worker()
         except Exception:
             pass
+        try:
+            if self._http_client is not None:
+                self._http_client.close()
+        except Exception:
+            pass
     @property
     def is_available(self) -> bool:
         """Check if embedding service can produce embeddings."""
+        if self._config.is_openai_compatible:
+            return bool(self._config.api_endpoint)
         if self._config.is_cloud:
             return bool(self._config.api_endpoint and self._config.api_key)
         return self._available
@@ -215,6 +223,11 @@ class EmbeddingService:
         """Embed a single text string. Returns list of floats or None."""
         if not text or not text.strip():
             raise ValueError("Cannot embed empty text")
+        if self._config.is_openai_compatible:
+            vecs = self._openai_compatible_embed_batch([text])
+            vec = vecs[0]
+            self._validate_dimension(np.asarray(vec))
+            return vec
         if self._config.is_cloud:
             return self._cloud_embed_single(text)
         result = self._subprocess_embed([text])
@@ -228,6 +241,12 @@ class EmbeddingService:
         """Embed a batch of texts."""
         if not texts:
             raise ValueError("Cannot embed empty batch")
+        if self._config.is_openai_compatible:
+            results = self._openai_compatible_embed_batch(texts)
+            for vec in results:
+                if vec is not None:
+                    self._validate_dimension(np.asarray(vec))
+            return results
         if self._config.is_cloud:
             return self._cloud_embed_batch(texts)
         result = self._subprocess_embed(texts)
@@ -458,6 +477,7 @@ class EmbeddingService:
                 "TOKENIZERS_PARALLELISM": "false",
                 "TORCH_DEVICE": "cpu",
             }
+            from superlocalmemory.core.platform_utils import popen_platform_kwargs
             self._worker_proc = subprocess.Popen(
                 [sys.executable, "-m", worker_module],
                 stdin=subprocess.PIPE,
@@ -466,7 +486,7 @@ class EmbeddingService:
                 text=True,
                 bufsize=1,
                 env=env,
-                start_new_session=True,
+                **popen_platform_kwargs(),
             )
             # v3.4.13: Register PID for machine-wide singleton guard
             register_embedding_worker_pid(self._worker_proc.pid)
@@ -511,6 +531,68 @@ class EmbeddingService:
         self._idle_timer.start()
         self._last_used = time.time()
+    # ------------------------------------------------------------------
+    # OpenAI-compatible embedding (V3.4.24 — any /v1/embeddings endpoint)
+    # ------------------------------------------------------------------
+    def _get_http_client(self):
+        """Reusable httpx client for OpenAI-compatible endpoints."""
+        if self._http_client is None:
+            import httpx
+            self._http_client = httpx.Client(
+                timeout=httpx.Timeout(connect=5.0, read=30.0, write=10.0, pool=5.0),
+            )
+        return self._http_client
+    def _openai_compatible_embed_batch(
+        self, texts: list[str], *, max_retries: int = 3,
+    ) -> list[list[float]]:
+        """Encode via any OpenAI-compatible embedding API.
+        V3.4.24: Standard ``/v1/embeddings`` format. Works with Ollama,
+        vLLM, LiteLLM, text-embeddings-inference, and any endpoint that
+        implements the OpenAI embeddings spec.
+        """
+        endpoint = self._config.api_endpoint.rstrip("/")
+        if not endpoint.endswith("/embeddings"):
+            endpoint = f"{endpoint}/embeddings"
+        headers = {"Content-Type": "application/json"}
+        if self._config.api_key:
+            headers["Authorization"] = f"Bearer {self._config.api_key}"
+        body = {
+            "input": texts,
+            "model": self._config.model_name,
+        }
+        client = self._get_http_client()
+        last_error: Exception | None = None
+        for attempt in range(max_retries):
+            try:
+                resp = client.post(endpoint, headers=headers, json=body)
+                resp.raise_for_status()
+                data = resp.json()
+                if "data" not in data or not isinstance(data["data"], list):
+                    raise ValueError(
+                        f"Unexpected response: missing 'data' array. Keys: {list(data.keys())}"
+                    )
+                results: list[list[float]] = []
+                for item in sorted(data["data"], key=lambda d: d["index"]):
+                    results.append(item["embedding"])
+                if len(results) != len(texts):
+                    logger.warning(
+                        "Embedding count mismatch: sent %d texts, got %d vectors",
+                        len(texts), len(results),
+                    )
+                return results
+            except Exception as exc:
+                last_error = exc
+                if attempt < max_retries - 1:
+                    time.sleep(2 ** attempt)
+        raise RuntimeError(
+            f"OpenAI-compatible embedding failed after {max_retries} retries: "
+            f"{last_error}"
+        )
     # ------------------------------------------------------------------
     # Cloud embedding (no subprocess needed — just HTTP)
     # ------------------------------------------------------------------

package/src/superlocalmemory/core/engine_wiring.py CHANGED Viewed

@@ -113,6 +113,14 @@ def init_embedder(config: SLMConfig) -> Any | None:
             return result
         return None
+    # --- V3.4.24: Explicit OpenAI-compatible provider ---
+    if provider == "openai" and emb_cfg.is_openai_compatible:
+        logger.info(
+            "Using OpenAI-compatible embedding endpoint: %s (model=%s, dim=%d)",
+            emb_cfg.api_endpoint, emb_cfg.model_name, emb_cfg.dimension,
+        )
+        return _try_service_embedder(EmbeddingService, emb_cfg)
     # --- Explicit cloud provider ---
     if provider == "cloud" or emb_cfg.is_cloud:
         return _try_service_embedder(EmbeddingService, emb_cfg)

package/src/superlocalmemory/core/platform_utils.py ADDED Viewed

@@ -0,0 +1,127 @@
+# Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
+# Licensed under AGPL-3.0-or-later - see LICENSE file
+# Part of SuperLocalMemory V3 | https://qualixar.com | https://varunpratap.com
+"""Cross-platform utilities for subprocess management and resource monitoring.
+V3.4.24: Consolidates Windows/POSIX branching from 10+ files into one module.
+Replaces the Unix-only ``resource`` module with ``psutil`` on Windows.
+Inspired by community PR #14 (GuillaumeG / Tyrin451).
+"""
+from __future__ import annotations
+import os
+import subprocess
+import sys
+import threading
+def popen_platform_kwargs() -> dict:
+    """Platform-appropriate kwargs for subprocess.Popen.
+    POSIX: ``start_new_session=True`` — prevents terminal signals bleeding.
+    Windows: ``CREATE_NO_WINDOW`` — prevents console window popup.
+    """
+    if sys.platform == "win32":
+        # CREATE_NO_WINDOW = 0x08000000 — only defined on Windows.
+        flag = getattr(subprocess, "CREATE_NO_WINDOW", 0x08000000)
+        return {"creationflags": flag}
+    return {"start_new_session": True}
+def get_rss_mb() -> float:
+    """Current process RSS in megabytes.
+    POSIX: ``resource.getrusage`` (stdlib). Windows: ``psutil``.
+    Returns 0.0 if measurement is unavailable.
+    """
+    if sys.platform != "win32":
+        try:
+            import resource
+            ru_maxrss = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
+            if sys.platform == "darwin":
+                return ru_maxrss / 1024 / 1024  # macOS: bytes
+            return ru_maxrss / 1024  # Linux: kilobytes
+        except Exception:
+            return 0.0
+    try:
+        import psutil
+        return psutil.Process().memory_info().rss / 1024 / 1024
+    except Exception:
+        return 0.0
+def is_pid_alive(pid: int) -> bool:
+    """Check whether a process with *pid* is alive.
+    POSIX: ``os.kill(pid, 0)`` — signal 0 checks existence.
+    Windows: ``psutil.pid_exists()`` with ``os.kill`` fallback.
+    """
+    if pid <= 0:
+        return False
+    if sys.platform != "win32":
+        try:
+            os.kill(pid, 0)
+            return True
+        except OSError:
+            return False
+    try:
+        import psutil
+        return psutil.pid_exists(pid)
+    except ImportError:
+        try:
+            os.kill(pid, 0)
+            return True
+        except OSError:
+            return False
+def kill_process(pid: int) -> bool:
+    """Send SIGTERM (POSIX) or taskkill /F /T (Windows).
+    Returns True if the signal was sent successfully.
+    """
+    if pid <= 0:
+        return False
+    if sys.platform == "win32":
+        try:
+            subprocess.call(
+                ["taskkill", "/F", "/T", "/PID", str(pid)],
+                stdout=subprocess.DEVNULL,
+                stderr=subprocess.DEVNULL,
+            )
+            return True
+        except Exception:
+            return False
+    try:
+        import signal
+        os.kill(pid, signal.SIGTERM)
+        return True
+    except OSError:
+        return False
+def start_parent_watchdog() -> None:
+    """Self-terminate when the parent process dies.
+    Prevents orphaned workers (500+ MB each) after parent crash/kill.
+    V3.3.7 origin: 33 GB consumed by orphaned workers.
+    V3.4.24: Consolidated from 3 separate worker files.
+    """
+    try:
+        parent_pid = os.getppid()
+    except AttributeError:
+        return
+    if parent_pid <= 1:
+        return
+    def _watch() -> None:
+        import time
+        while True:
+            time.sleep(5)
+            if not is_pid_alive(parent_pid):
+                os._exit(0)
+    t = threading.Thread(target=_watch, daemon=True, name="parent-watchdog")
+    t.start()

package/src/superlocalmemory/core/recall_worker.py CHANGED Viewed

@@ -20,7 +20,6 @@ import json
 import os
 import signal
 import sys
-import threading
 # Force CPU BEFORE any torch import
 os.environ["CUDA_VISIBLE_DEVICES"] = ""
@@ -39,24 +38,10 @@ if sys.platform != "win32":
 def _start_parent_watchdog() -> None:
     """Monitor parent process — self-terminate if parent dies.
-    Prevents orphaned workers that consume 500+ MB each when the parent
-    process crashes, is killed, or exits without cleanup.
-    V3.3.7: Added after incident where orphaned workers consumed 33 GB.
+    V3.4.24: Delegates to platform_utils.start_parent_watchdog().
     """
-    parent_pid = os.getppid()
-    def _watch() -> None:
-        import time
-        while True:
-            time.sleep(5)
-            try:
-                os.kill(parent_pid, 0)
-            except OSError:
-                os._exit(0)
-    t = threading.Thread(target=_watch, daemon=True, name="parent-watchdog")
-    t.start()
+    from superlocalmemory.core.platform_utils import start_parent_watchdog
+    start_parent_watchdog()
 _engine = None
@@ -253,7 +238,8 @@ def _handle_status() -> dict:
 def _worker_main() -> None:
     """Main loop: read JSON requests from stdin, write responses to stdout."""
-    _start_parent_watchdog()  # V3.3.7: self-terminate if parent dies
+    _start_parent_watchdog()
+    from superlocalmemory.core.platform_utils import get_rss_mb
     for line in sys.stdin:
         line = line.strip()
@@ -326,11 +312,9 @@ def _worker_main() -> None:
         except Exception as exc:
             _respond({"ok": False, "error": str(exc)})
-        # V3.3.16: RSS watchdog — self-terminate if memory exceeds 1.5GB.
-        # Parent auto-respawns a fresh worker on next request.
-        import resource
-        rss_mb = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024 / 1024
-        if rss_mb > 2500:
+        # V3.3.16: RSS watchdog — V3.4.24: cross-platform via platform_utils.
+        rss_mb = get_rss_mb()
+        if rss_mb > 0 and rss_mb > 2500:
             sys.exit(0)

package/src/superlocalmemory/core/reranker_worker.py CHANGED Viewed

@@ -31,7 +31,6 @@ import platform
 import signal
 import struct
 import sys
-import threading
 # Force CPU BEFORE any torch import
 os.environ["CUDA_VISIBLE_DEVICES"] = ""
@@ -52,25 +51,10 @@ if sys.platform != "win32":
 def _start_parent_watchdog() -> None:
     """Monitor parent process — self-terminate if parent dies.
-    Prevents orphaned workers that consume 1+ GB each when the parent
-    process crashes, is killed, or exits without cleanup.
-    V3.3.7: Added after incident where ~30 orphaned workers consumed 33 GB.
+    V3.4.24: Delegates to platform_utils.start_parent_watchdog().
     """
-    parent_pid = os.getppid()
-    def _watch() -> None:
-        import time
-        while True:
-            time.sleep(5)
-            try:
-                os.kill(parent_pid, 0)  # Check if parent is alive (signal 0)
-            except OSError:
-                # Parent is dead — self-terminate
-                os._exit(0)
-    t = threading.Thread(target=_watch, daemon=True, name="parent-watchdog")
-    t.start()
+    from superlocalmemory.core.platform_utils import start_parent_watchdog
+    start_parent_watchdog()
 def _detect_onnx_variant(model_name: str = "") -> str:
@@ -101,7 +85,8 @@ def _detect_onnx_variant(model_name: str = "") -> str:
 def _worker_main() -> None:
     """Main loop: read JSON requests from stdin, write responses to stdout."""
-    _start_parent_watchdog()  # V3.3.7: self-terminate if parent dies
+    _start_parent_watchdog()
+    from superlocalmemory.core.platform_utils import get_rss_mb
     model = None
     active_backend = ""
@@ -194,10 +179,9 @@ def _worker_main() -> None:
             except Exception as exc:
                 _respond({"ok": False, "error": str(exc)})
-            # V3.3.16: RSS watchdog — same as embedding_worker
-            import resource
-            rss_mb = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024 / 1024
-            if rss_mb > 2500:
+            # V3.3.16: RSS watchdog — V3.4.24: cross-platform via platform_utils.
+            rss_mb = get_rss_mb()
+            if rss_mb > 0 and rss_mb > 2500:
                 sys.exit(0)
             continue

package/src/superlocalmemory/core/worker_pool.py CHANGED Viewed

@@ -247,6 +247,7 @@ class WorkerPool:
                 "TOKENIZERS_PARALLELISM": "false",
                 "TORCH_DEVICE": "cpu",
             }
+            from superlocalmemory.core.platform_utils import popen_platform_kwargs
             self._proc = subprocess.Popen(
                 [sys.executable, "-m", "superlocalmemory.core.recall_worker"],
                 stdin=subprocess.PIPE,
@@ -255,7 +256,7 @@ class WorkerPool:
                 text=True,
                 bufsize=1,
                 env=env,
-                start_new_session=True,  # Prevent terminal signals bleeding to worker
+                **popen_platform_kwargs(),
             )
             logger.info("Recall worker spawned (PID %d)", self._proc.pid)
         except Exception as exc:

package/src/superlocalmemory/retrieval/reranker.py CHANGED Viewed

@@ -193,6 +193,7 @@ class CrossEncoderReranker:
                 "TOKENIZERS_PARALLELISM": "false",
                 "TORCH_DEVICE": "cpu",
             }
+            from superlocalmemory.core.platform_utils import popen_platform_kwargs
             self._worker_proc = subprocess.Popen(
                 [sys.executable, "-m", worker_module],
                 stdin=subprocess.PIPE,
@@ -201,7 +202,7 @@ class CrossEncoderReranker:
                 text=True,
                 bufsize=1,
                 env=env,
-                start_new_session=True,
+                **popen_platform_kwargs(),
             )
             # v3.4.13: Register PID for machine-wide singleton
             _RERANKER_PID_FILE.parent.mkdir(parents=True, exist_ok=True)