superlocalmemory 3.4.23 → 3.4.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "superlocalmemory",
3
- "version": "3.4.23",
3
+ "version": "3.4.24",
4
4
  "description": "Information-geometric agent memory with mathematical guarantees. 4-channel retrieval, Fisher-Rao similarity, zero-LLM mode, EU AI Act compliant. Works with Claude, Cursor, Windsurf, and 17+ AI tools.",
5
5
  "keywords": [
6
6
  "ai-memory",
package/pyproject.toml CHANGED
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "superlocalmemory"
3
- version = "3.4.23"
3
+ version = "3.4.24"
4
4
  description = "Information-geometric agent memory with mathematical guarantees"
5
5
  readme = "README.md"
6
6
  license = {text = "AGPL-3.0-or-later"}
@@ -1,3 +1,3 @@
1
1
  """SuperLocalMemory — information-geometric agent memory."""
2
2
 
3
- __version__ = "3.4.23"
3
+ __version__ = "3.4.24"
@@ -37,7 +37,8 @@ class EmbeddingConfig:
37
37
 
38
38
  model_name: str = "nomic-ai/nomic-embed-text-v1.5"
39
39
  dimension: int = 768
40
- # Provider: "" = auto-detect, "sentence-transformers", "ollama", "cloud"
40
+ # Provider: "" = auto-detect, "sentence-transformers", "ollama", "cloud",
41
+ # "openai" (V3.4.24: any OpenAI-compatible /v1/embeddings endpoint)
41
42
  provider: str = ""
42
43
  # Ollama settings (used when provider="ollama" or auto-detected)
43
44
  ollama_model: str = "nomic-embed-text"
@@ -50,12 +51,19 @@ class EmbeddingConfig:
50
51
 
51
52
  @property
52
53
  def is_cloud(self) -> bool:
54
+ if self.provider == "openai":
55
+ return False
53
56
  return bool(self.api_endpoint) or self.provider == "cloud"
54
57
 
55
58
  @property
56
59
  def is_ollama(self) -> bool:
57
60
  return self.provider == "ollama"
58
61
 
62
+ @property
63
+ def is_openai_compatible(self) -> bool:
64
+ """V3.4.24: True when using a custom OpenAI-compatible endpoint."""
65
+ return self.provider == "openai" and bool(self.api_endpoint)
66
+
59
67
 
60
68
  # ---------------------------------------------------------------------------
61
69
  # LLM Config
@@ -639,6 +647,8 @@ class SLMConfig:
639
647
  embedding_endpoint=emb_data.get("api_endpoint", ""),
640
648
  embedding_key=emb_data.get("api_key", ""),
641
649
  embedding_deployment=emb_data.get("deployment_name", ""),
650
+ embedding_model_name=emb_data.get("model_name", ""),
651
+ embedding_dimension=int(emb_data.get("dimension", 0) or 0),
642
652
  )
643
653
  config.active_profile = data.get("active_profile", "default")
644
654
 
@@ -787,20 +797,34 @@ class SLMConfig:
787
797
  embedding_endpoint: str = "",
788
798
  embedding_key: str = "",
789
799
  embedding_deployment: str = "",
800
+ embedding_model_name: str = "",
801
+ embedding_dimension: int = 0,
790
802
  ) -> SLMConfig:
791
803
  """Create config with mode-appropriate defaults."""
792
804
  _base = base_dir or DEFAULT_BASE_DIR
793
805
 
794
806
  if mode == Mode.A:
807
+ # V3.4.24: If user chose "openai" provider, honour their custom
808
+ # endpoint/model/dimension. Otherwise use local defaults.
809
+ _a_provider = embedding_provider or "sentence-transformers"
810
+ if _a_provider == "openai" and embedding_endpoint:
811
+ _a_emb = EmbeddingConfig(
812
+ model_name=embedding_model_name or "nomic-ai/nomic-embed-text-v1.5",
813
+ dimension=embedding_dimension or 768,
814
+ provider="openai",
815
+ api_endpoint=embedding_endpoint,
816
+ api_key=embedding_key,
817
+ )
818
+ else:
819
+ _a_emb = EmbeddingConfig(
820
+ model_name="nomic-ai/nomic-embed-text-v1.5",
821
+ dimension=768,
822
+ provider=_a_provider,
823
+ )
795
824
  return cls(
796
825
  mode=mode,
797
826
  base_dir=_base,
798
- embedding=EmbeddingConfig(
799
- model_name="nomic-ai/nomic-embed-text-v1.5",
800
- dimension=768,
801
- # Mode A: sentence-transformers in SUBPROCESS (never in-process)
802
- provider=embedding_provider or "sentence-transformers",
803
- ),
827
+ embedding=_a_emb,
804
828
  llm=LLMConfig(), # No LLM
805
829
  retrieval=RetrievalConfig(
806
830
  # V3.3.2: ONNX cross-encoder enabled for all modes (~200MB)
@@ -816,15 +840,27 @@ class SLMConfig:
816
840
  )
817
841
 
818
842
  if mode == Mode.B:
843
+ # V3.4.24: If user chose "openai" provider with a custom endpoint
844
+ # (e.g. local vLLM, LiteLLM, Ollama /v1), honour it.
845
+ _b_provider = embedding_provider or "ollama"
846
+ if _b_provider == "openai" and embedding_endpoint:
847
+ _b_emb = EmbeddingConfig(
848
+ model_name=embedding_model_name or "nomic-ai/nomic-embed-text-v1.5",
849
+ dimension=embedding_dimension or 768,
850
+ provider="openai",
851
+ api_endpoint=embedding_endpoint,
852
+ api_key=embedding_key,
853
+ )
854
+ else:
855
+ _b_emb = EmbeddingConfig(
856
+ model_name="nomic-ai/nomic-embed-text-v1.5",
857
+ dimension=768,
858
+ provider=_b_provider,
859
+ )
819
860
  return cls(
820
861
  mode=mode,
821
862
  base_dir=_base,
822
- embedding=EmbeddingConfig(
823
- model_name="nomic-ai/nomic-embed-text-v1.5",
824
- dimension=768,
825
- # Mode B: Ollama HTTP API (zero PyTorch in-process)
826
- provider=embedding_provider or "ollama",
827
- ),
863
+ embedding=_b_emb,
828
864
  llm=LLMConfig(
829
865
  provider=llm_provider or "ollama",
830
866
  model=llm_model or "llama3.2",
@@ -841,16 +877,28 @@ class SLMConfig:
841
877
  # Don't carry over local-only providers (ollama) to cloud mode
842
878
  c_provider = llm_provider if llm_provider not in ("ollama", "") else "openrouter"
843
879
  c_model = llm_model if llm_provider not in ("ollama", "") else "anthropic/claude-sonnet-4"
844
- return cls(
845
- mode=mode,
846
- base_dir=_base,
847
- embedding=EmbeddingConfig(
880
+ # V3.4.24: If user chose "openai" provider, honour it in Mode C too.
881
+ _c_emb_provider = embedding_provider or ""
882
+ if _c_emb_provider == "openai" and embedding_endpoint:
883
+ _c_emb = EmbeddingConfig(
884
+ model_name=embedding_model_name or "text-embedding-3-large",
885
+ dimension=embedding_dimension or 3072,
886
+ provider="openai",
887
+ api_endpoint=embedding_endpoint,
888
+ api_key=embedding_key,
889
+ )
890
+ else:
891
+ _c_emb = EmbeddingConfig(
848
892
  model_name="text-embedding-3-large",
849
893
  dimension=3072,
850
894
  api_endpoint=embedding_endpoint,
851
895
  api_key=embedding_key,
852
896
  deployment_name=embedding_deployment,
853
- ),
897
+ )
898
+ return cls(
899
+ mode=mode,
900
+ base_dir=_base,
901
+ embedding=_c_emb,
854
902
  llm=LLMConfig(
855
903
  provider=c_provider,
856
904
  model=c_model,
@@ -26,7 +26,6 @@ import json
26
26
  import os
27
27
  import signal
28
28
  import sys
29
- import threading
30
29
 
31
30
  # Force CPU BEFORE any torch import
32
31
  os.environ["CUDA_VISIBLE_DEVICES"] = ""
@@ -47,24 +46,10 @@ if sys.platform != "win32":
47
46
  def _start_parent_watchdog() -> None:
48
47
  """Monitor parent process — self-terminate if parent dies.
49
48
 
50
- Prevents orphaned workers that consume 500-800 MB each when the parent
51
- process crashes, is killed, or exits without cleanup.
52
-
53
- V3.3.7: Added after incident where orphaned workers consumed 33 GB.
49
+ V3.4.24: Delegates to platform_utils.start_parent_watchdog().
54
50
  """
55
- parent_pid = os.getppid()
56
-
57
- def _watch() -> None:
58
- import time
59
- while True:
60
- time.sleep(5)
61
- try:
62
- os.kill(parent_pid, 0)
63
- except OSError:
64
- os._exit(0)
65
-
66
- t = threading.Thread(target=_watch, daemon=True, name="parent-watchdog")
67
- t.start()
51
+ from superlocalmemory.core.platform_utils import start_parent_watchdog
52
+ start_parent_watchdog()
68
53
 
69
54
 
70
55
  def _load_embedding_model(name: str) -> tuple:
@@ -97,9 +82,10 @@ def _load_embedding_model(name: str) -> tuple:
97
82
 
98
83
  def _worker_main() -> None:
99
84
  """Main loop: read JSON requests from stdin, write responses to stdout."""
100
- _start_parent_watchdog() # V3.3.7: self-terminate if parent dies
85
+ _start_parent_watchdog()
101
86
 
102
87
  import numpy as np
88
+ from superlocalmemory.core.platform_utils import get_rss_mb
103
89
 
104
90
  model = None
105
91
  model_name = None
@@ -164,15 +150,10 @@ def _worker_main() -> None:
164
150
  except Exception as exc:
165
151
  _respond({"ok": False, "error": str(exc)})
166
152
 
167
- # V3.3.16: RSS watchdog — self-terminate if memory exceeds limit.
168
- # PyTorch on ARM64 Mac never returns memory to OS. After ~200 embeds
169
- # a worker that started at 300MB grows to 17GB+. Parent auto-respawns
170
- # a fresh worker on next request (existing mechanism in embeddings.py).
171
- # V3.3.21: Configurable via SLM_EMBED_WORKER_RSS_LIMIT_MB (default 2500MB).
172
- import resource
153
+ # V3.3.16: RSS watchdog — V3.4.24: cross-platform via platform_utils.
173
154
  _rss_limit = int(os.environ.get("SLM_EMBED_WORKER_RSS_LIMIT_MB", 4000))
174
- rss_mb = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024 / 1024
175
- if rss_mb > _rss_limit:
155
+ rss_mb = get_rss_mb()
156
+ if rss_mb > 0 and rss_mb > _rss_limit:
176
157
  sys.exit(0)
177
158
 
178
159
  continue
@@ -178,6 +178,7 @@ class EmbeddingService:
178
178
  self._idle_timer: threading.Timer | None = None
179
179
  self._worker_ready = False
180
180
  self._request_count: int = 0
181
+ self._http_client: object | None = None
181
182
 
182
183
  # Register for atexit cleanup (prevent orphaned workers)
183
184
  ref = weakref.ref(self, _live_embedding_services.discard)
@@ -189,10 +190,17 @@ class EmbeddingService:
189
190
  self._kill_worker()
190
191
  except Exception:
191
192
  pass
193
+ try:
194
+ if self._http_client is not None:
195
+ self._http_client.close()
196
+ except Exception:
197
+ pass
192
198
 
193
199
  @property
194
200
  def is_available(self) -> bool:
195
201
  """Check if embedding service can produce embeddings."""
202
+ if self._config.is_openai_compatible:
203
+ return bool(self._config.api_endpoint)
196
204
  if self._config.is_cloud:
197
205
  return bool(self._config.api_endpoint and self._config.api_key)
198
206
  return self._available
@@ -215,6 +223,11 @@ class EmbeddingService:
215
223
  """Embed a single text string. Returns list of floats or None."""
216
224
  if not text or not text.strip():
217
225
  raise ValueError("Cannot embed empty text")
226
+ if self._config.is_openai_compatible:
227
+ vecs = self._openai_compatible_embed_batch([text])
228
+ vec = vecs[0]
229
+ self._validate_dimension(np.asarray(vec))
230
+ return vec
218
231
  if self._config.is_cloud:
219
232
  return self._cloud_embed_single(text)
220
233
  result = self._subprocess_embed([text])
@@ -228,6 +241,12 @@ class EmbeddingService:
228
241
  """Embed a batch of texts."""
229
242
  if not texts:
230
243
  raise ValueError("Cannot embed empty batch")
244
+ if self._config.is_openai_compatible:
245
+ results = self._openai_compatible_embed_batch(texts)
246
+ for vec in results:
247
+ if vec is not None:
248
+ self._validate_dimension(np.asarray(vec))
249
+ return results
231
250
  if self._config.is_cloud:
232
251
  return self._cloud_embed_batch(texts)
233
252
  result = self._subprocess_embed(texts)
@@ -458,6 +477,7 @@ class EmbeddingService:
458
477
  "TOKENIZERS_PARALLELISM": "false",
459
478
  "TORCH_DEVICE": "cpu",
460
479
  }
480
+ from superlocalmemory.core.platform_utils import popen_platform_kwargs
461
481
  self._worker_proc = subprocess.Popen(
462
482
  [sys.executable, "-m", worker_module],
463
483
  stdin=subprocess.PIPE,
@@ -466,7 +486,7 @@ class EmbeddingService:
466
486
  text=True,
467
487
  bufsize=1,
468
488
  env=env,
469
- start_new_session=True,
489
+ **popen_platform_kwargs(),
470
490
  )
471
491
  # v3.4.13: Register PID for machine-wide singleton guard
472
492
  register_embedding_worker_pid(self._worker_proc.pid)
@@ -511,6 +531,68 @@ class EmbeddingService:
511
531
  self._idle_timer.start()
512
532
  self._last_used = time.time()
513
533
 
534
+ # ------------------------------------------------------------------
535
+ # OpenAI-compatible embedding (V3.4.24 — any /v1/embeddings endpoint)
536
+ # ------------------------------------------------------------------
537
+
538
+ def _get_http_client(self):
539
+ """Reusable httpx client for OpenAI-compatible endpoints."""
540
+ if self._http_client is None:
541
+ import httpx
542
+ self._http_client = httpx.Client(
543
+ timeout=httpx.Timeout(connect=5.0, read=30.0, write=10.0, pool=5.0),
544
+ )
545
+ return self._http_client
546
+
547
+ def _openai_compatible_embed_batch(
548
+ self, texts: list[str], *, max_retries: int = 3,
549
+ ) -> list[list[float]]:
550
+ """Encode via any OpenAI-compatible embedding API.
551
+
552
+ V3.4.24: Standard ``/v1/embeddings`` format. Works with Ollama,
553
+ vLLM, LiteLLM, text-embeddings-inference, and any endpoint that
554
+ implements the OpenAI embeddings spec.
555
+ """
556
+ endpoint = self._config.api_endpoint.rstrip("/")
557
+ if not endpoint.endswith("/embeddings"):
558
+ endpoint = f"{endpoint}/embeddings"
559
+ headers = {"Content-Type": "application/json"}
560
+ if self._config.api_key:
561
+ headers["Authorization"] = f"Bearer {self._config.api_key}"
562
+ body = {
563
+ "input": texts,
564
+ "model": self._config.model_name,
565
+ }
566
+
567
+ client = self._get_http_client()
568
+ last_error: Exception | None = None
569
+ for attempt in range(max_retries):
570
+ try:
571
+ resp = client.post(endpoint, headers=headers, json=body)
572
+ resp.raise_for_status()
573
+ data = resp.json()
574
+ if "data" not in data or not isinstance(data["data"], list):
575
+ raise ValueError(
576
+ f"Unexpected response: missing 'data' array. Keys: {list(data.keys())}"
577
+ )
578
+ results: list[list[float]] = []
579
+ for item in sorted(data["data"], key=lambda d: d["index"]):
580
+ results.append(item["embedding"])
581
+ if len(results) != len(texts):
582
+ logger.warning(
583
+ "Embedding count mismatch: sent %d texts, got %d vectors",
584
+ len(texts), len(results),
585
+ )
586
+ return results
587
+ except Exception as exc:
588
+ last_error = exc
589
+ if attempt < max_retries - 1:
590
+ time.sleep(2 ** attempt)
591
+ raise RuntimeError(
592
+ f"OpenAI-compatible embedding failed after {max_retries} retries: "
593
+ f"{last_error}"
594
+ )
595
+
514
596
  # ------------------------------------------------------------------
515
597
  # Cloud embedding (no subprocess needed — just HTTP)
516
598
  # ------------------------------------------------------------------
@@ -113,6 +113,14 @@ def init_embedder(config: SLMConfig) -> Any | None:
113
113
  return result
114
114
  return None
115
115
 
116
+ # --- V3.4.24: Explicit OpenAI-compatible provider ---
117
+ if provider == "openai" and emb_cfg.is_openai_compatible:
118
+ logger.info(
119
+ "Using OpenAI-compatible embedding endpoint: %s (model=%s, dim=%d)",
120
+ emb_cfg.api_endpoint, emb_cfg.model_name, emb_cfg.dimension,
121
+ )
122
+ return _try_service_embedder(EmbeddingService, emb_cfg)
123
+
116
124
  # --- Explicit cloud provider ---
117
125
  if provider == "cloud" or emb_cfg.is_cloud:
118
126
  return _try_service_embedder(EmbeddingService, emb_cfg)
@@ -0,0 +1,127 @@
1
+ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
2
+ # Licensed under AGPL-3.0-or-later - see LICENSE file
3
+ # Part of SuperLocalMemory V3 | https://qualixar.com | https://varunpratap.com
4
+
5
+ """Cross-platform utilities for subprocess management and resource monitoring.
6
+
7
+ V3.4.24: Consolidates Windows/POSIX branching from 10+ files into one module.
8
+ Replaces the Unix-only ``resource`` module with ``psutil`` on Windows.
9
+ Inspired by community PR #14 (GuillaumeG / Tyrin451).
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import os
15
+ import subprocess
16
+ import sys
17
+ import threading
18
+
19
+
20
+ def popen_platform_kwargs() -> dict:
21
+ """Platform-appropriate kwargs for subprocess.Popen.
22
+
23
+ POSIX: ``start_new_session=True`` — prevents terminal signals bleeding.
24
+ Windows: ``CREATE_NO_WINDOW`` — prevents console window popup.
25
+ """
26
+ if sys.platform == "win32":
27
+ # CREATE_NO_WINDOW = 0x08000000 — only defined on Windows.
28
+ flag = getattr(subprocess, "CREATE_NO_WINDOW", 0x08000000)
29
+ return {"creationflags": flag}
30
+ return {"start_new_session": True}
31
+
32
+
33
+ def get_rss_mb() -> float:
34
+ """Current process RSS in megabytes.
35
+
36
+ POSIX: ``resource.getrusage`` (stdlib). Windows: ``psutil``.
37
+ Returns 0.0 if measurement is unavailable.
38
+ """
39
+ if sys.platform != "win32":
40
+ try:
41
+ import resource
42
+ ru_maxrss = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
43
+ if sys.platform == "darwin":
44
+ return ru_maxrss / 1024 / 1024 # macOS: bytes
45
+ return ru_maxrss / 1024 # Linux: kilobytes
46
+ except Exception:
47
+ return 0.0
48
+ try:
49
+ import psutil
50
+ return psutil.Process().memory_info().rss / 1024 / 1024
51
+ except Exception:
52
+ return 0.0
53
+
54
+
55
+ def is_pid_alive(pid: int) -> bool:
56
+ """Check whether a process with *pid* is alive.
57
+
58
+ POSIX: ``os.kill(pid, 0)`` — signal 0 checks existence.
59
+ Windows: ``psutil.pid_exists()`` with ``os.kill`` fallback.
60
+ """
61
+ if pid <= 0:
62
+ return False
63
+ if sys.platform != "win32":
64
+ try:
65
+ os.kill(pid, 0)
66
+ return True
67
+ except OSError:
68
+ return False
69
+ try:
70
+ import psutil
71
+ return psutil.pid_exists(pid)
72
+ except ImportError:
73
+ try:
74
+ os.kill(pid, 0)
75
+ return True
76
+ except OSError:
77
+ return False
78
+
79
+
80
+ def kill_process(pid: int) -> bool:
81
+ """Send SIGTERM (POSIX) or taskkill /F /T (Windows).
82
+
83
+ Returns True if the signal was sent successfully.
84
+ """
85
+ if pid <= 0:
86
+ return False
87
+ if sys.platform == "win32":
88
+ try:
89
+ subprocess.call(
90
+ ["taskkill", "/F", "/T", "/PID", str(pid)],
91
+ stdout=subprocess.DEVNULL,
92
+ stderr=subprocess.DEVNULL,
93
+ )
94
+ return True
95
+ except Exception:
96
+ return False
97
+ try:
98
+ import signal
99
+ os.kill(pid, signal.SIGTERM)
100
+ return True
101
+ except OSError:
102
+ return False
103
+
104
+
105
+ def start_parent_watchdog() -> None:
106
+ """Self-terminate when the parent process dies.
107
+
108
+ Prevents orphaned workers (500+ MB each) after parent crash/kill.
109
+ V3.3.7 origin: 33 GB consumed by orphaned workers.
110
+ V3.4.24: Consolidated from 3 separate worker files.
111
+ """
112
+ try:
113
+ parent_pid = os.getppid()
114
+ except AttributeError:
115
+ return
116
+ if parent_pid <= 1:
117
+ return
118
+
119
+ def _watch() -> None:
120
+ import time
121
+ while True:
122
+ time.sleep(5)
123
+ if not is_pid_alive(parent_pid):
124
+ os._exit(0)
125
+
126
+ t = threading.Thread(target=_watch, daemon=True, name="parent-watchdog")
127
+ t.start()
@@ -20,7 +20,6 @@ import json
20
20
  import os
21
21
  import signal
22
22
  import sys
23
- import threading
24
23
 
25
24
  # Force CPU BEFORE any torch import
26
25
  os.environ["CUDA_VISIBLE_DEVICES"] = ""
@@ -39,24 +38,10 @@ if sys.platform != "win32":
39
38
  def _start_parent_watchdog() -> None:
40
39
  """Monitor parent process — self-terminate if parent dies.
41
40
 
42
- Prevents orphaned workers that consume 500+ MB each when the parent
43
- process crashes, is killed, or exits without cleanup.
44
-
45
- V3.3.7: Added after incident where orphaned workers consumed 33 GB.
41
+ V3.4.24: Delegates to platform_utils.start_parent_watchdog().
46
42
  """
47
- parent_pid = os.getppid()
48
-
49
- def _watch() -> None:
50
- import time
51
- while True:
52
- time.sleep(5)
53
- try:
54
- os.kill(parent_pid, 0)
55
- except OSError:
56
- os._exit(0)
57
-
58
- t = threading.Thread(target=_watch, daemon=True, name="parent-watchdog")
59
- t.start()
43
+ from superlocalmemory.core.platform_utils import start_parent_watchdog
44
+ start_parent_watchdog()
60
45
 
61
46
  _engine = None
62
47
 
@@ -253,7 +238,8 @@ def _handle_status() -> dict:
253
238
 
254
239
  def _worker_main() -> None:
255
240
  """Main loop: read JSON requests from stdin, write responses to stdout."""
256
- _start_parent_watchdog() # V3.3.7: self-terminate if parent dies
241
+ _start_parent_watchdog()
242
+ from superlocalmemory.core.platform_utils import get_rss_mb
257
243
 
258
244
  for line in sys.stdin:
259
245
  line = line.strip()
@@ -326,11 +312,9 @@ def _worker_main() -> None:
326
312
  except Exception as exc:
327
313
  _respond({"ok": False, "error": str(exc)})
328
314
 
329
- # V3.3.16: RSS watchdog — self-terminate if memory exceeds 1.5GB.
330
- # Parent auto-respawns a fresh worker on next request.
331
- import resource
332
- rss_mb = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024 / 1024
333
- if rss_mb > 2500:
315
+ # V3.3.16: RSS watchdog — V3.4.24: cross-platform via platform_utils.
316
+ rss_mb = get_rss_mb()
317
+ if rss_mb > 0 and rss_mb > 2500:
334
318
  sys.exit(0)
335
319
 
336
320
 
@@ -31,7 +31,6 @@ import platform
31
31
  import signal
32
32
  import struct
33
33
  import sys
34
- import threading
35
34
 
36
35
  # Force CPU BEFORE any torch import
37
36
  os.environ["CUDA_VISIBLE_DEVICES"] = ""
@@ -52,25 +51,10 @@ if sys.platform != "win32":
52
51
  def _start_parent_watchdog() -> None:
53
52
  """Monitor parent process — self-terminate if parent dies.
54
53
 
55
- Prevents orphaned workers that consume 1+ GB each when the parent
56
- process crashes, is killed, or exits without cleanup.
57
-
58
- V3.3.7: Added after incident where ~30 orphaned workers consumed 33 GB.
54
+ V3.4.24: Delegates to platform_utils.start_parent_watchdog().
59
55
  """
60
- parent_pid = os.getppid()
61
-
62
- def _watch() -> None:
63
- import time
64
- while True:
65
- time.sleep(5)
66
- try:
67
- os.kill(parent_pid, 0) # Check if parent is alive (signal 0)
68
- except OSError:
69
- # Parent is dead — self-terminate
70
- os._exit(0)
71
-
72
- t = threading.Thread(target=_watch, daemon=True, name="parent-watchdog")
73
- t.start()
56
+ from superlocalmemory.core.platform_utils import start_parent_watchdog
57
+ start_parent_watchdog()
74
58
 
75
59
 
76
60
  def _detect_onnx_variant(model_name: str = "") -> str:
@@ -101,7 +85,8 @@ def _detect_onnx_variant(model_name: str = "") -> str:
101
85
 
102
86
  def _worker_main() -> None:
103
87
  """Main loop: read JSON requests from stdin, write responses to stdout."""
104
- _start_parent_watchdog() # V3.3.7: self-terminate if parent dies
88
+ _start_parent_watchdog()
89
+ from superlocalmemory.core.platform_utils import get_rss_mb
105
90
 
106
91
  model = None
107
92
  active_backend = ""
@@ -194,10 +179,9 @@ def _worker_main() -> None:
194
179
  except Exception as exc:
195
180
  _respond({"ok": False, "error": str(exc)})
196
181
 
197
- # V3.3.16: RSS watchdog — same as embedding_worker
198
- import resource
199
- rss_mb = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024 / 1024
200
- if rss_mb > 2500:
182
+ # V3.3.16: RSS watchdog — V3.4.24: cross-platform via platform_utils.
183
+ rss_mb = get_rss_mb()
184
+ if rss_mb > 0 and rss_mb > 2500:
201
185
  sys.exit(0)
202
186
 
203
187
  continue
@@ -247,6 +247,7 @@ class WorkerPool:
247
247
  "TOKENIZERS_PARALLELISM": "false",
248
248
  "TORCH_DEVICE": "cpu",
249
249
  }
250
+ from superlocalmemory.core.platform_utils import popen_platform_kwargs
250
251
  self._proc = subprocess.Popen(
251
252
  [sys.executable, "-m", "superlocalmemory.core.recall_worker"],
252
253
  stdin=subprocess.PIPE,
@@ -255,7 +256,7 @@ class WorkerPool:
255
256
  text=True,
256
257
  bufsize=1,
257
258
  env=env,
258
- start_new_session=True, # Prevent terminal signals bleeding to worker
259
+ **popen_platform_kwargs(),
259
260
  )
260
261
  logger.info("Recall worker spawned (PID %d)", self._proc.pid)
261
262
  except Exception as exc:
@@ -193,6 +193,7 @@ class CrossEncoderReranker:
193
193
  "TOKENIZERS_PARALLELISM": "false",
194
194
  "TORCH_DEVICE": "cpu",
195
195
  }
196
+ from superlocalmemory.core.platform_utils import popen_platform_kwargs
196
197
  self._worker_proc = subprocess.Popen(
197
198
  [sys.executable, "-m", worker_module],
198
199
  stdin=subprocess.PIPE,
@@ -201,7 +202,7 @@ class CrossEncoderReranker:
201
202
  text=True,
202
203
  bufsize=1,
203
204
  env=env,
204
- start_new_session=True,
205
+ **popen_platform_kwargs(),
205
206
  )
206
207
  # v3.4.13: Register PID for machine-wide singleton
207
208
  _RERANKER_PID_FILE.parent.mkdir(parents=True, exist_ok=True)